diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000000000..2ad7fdc1efa0a
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,11 @@
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
+    open-pull-requests-limit: 100
+    labels:
+      - "dependencies"
+      - "github-actions"
+      - "domain:ci"
diff --git a/.github/workflows/LabelCheck.yml b/.github/workflows/LabelCheck.yml
index 0ff56f4b9dfdc..c966e478e3fe0 100644
--- a/.github/workflows/LabelCheck.yml
+++ b/.github/workflows/LabelCheck.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 2
     steps:
-    - uses: yogevbd/enforce-label-action@2.2.2
+    - uses: yogevbd/enforce-label-action@a3c219da6b8fa73f6ba62b68ff09c469b3a1c024 # 2.2.2
       with:
         # REQUIRED_LABELS_ANY: "bug,enhancement,skip-changelog"
         # REQUIRED_LABELS_ANY_DESCRIPTION: "Select at least one label ['bug','enhancement','skip-changelog']"
diff --git a/.github/workflows/Typos.yml b/.github/workflows/Typos.yml
index da5a6a550abe8..6c9eeacc21800 100644
--- a/.github/workflows/Typos.yml
+++ b/.github/workflows/Typos.yml
@@ -11,11 +11,11 @@ jobs:
     timeout-minutes: 5
     steps:
       - name: Checkout the JuliaLang/julia repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
           persist-credentials: false
       - name: Check spelling with typos
-        #uses: crate-ci/typos@master
+        #uses: crate-ci/typos@c7af4712eda24dd1ef54bd8212973888489eb0ce # v1.23.5
         env:
           GH_TOKEN: "${{ github.token }}"
         run: |
diff --git a/.github/workflows/Whitespace.yml b/.github/workflows/Whitespace.yml
new file mode 100644
index 0000000000000..5706f6148dc33
--- /dev/null
+++ b/.github/workflows/Whitespace.yml
@@ -0,0 +1,23 @@
+name: Whitespace
+
+permissions: {}
+
+on:
+  push:
+    branches:
+      - master
+  pull_request:
+
+jobs:
+  whitespace:
+    name: Check whitespace
+    runs-on: ubuntu-latest
+    timeout-minutes: 2
+    steps:
+      - name: Checkout the JuliaLang/julia repository
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
+        with:
+          persist-credentials: false
+      - name: Check whitespace
+        run: |
+          contrib/check-whitespace.jl
diff --git a/.github/workflows/cffconvert.yml b/.github/workflows/cffconvert.yml
index 751476865ae4c..4c9debb246f3f 100644
--- a/.github/workflows/cffconvert.yml
+++ b/.github/workflows/cffconvert.yml
@@ -23,11 +23,11 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out a copy of the repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
         with:
           persist-credentials: false
 
       - name: Check whether the citation metadata from CITATION.cff is valid
-        uses: citation-file-format/cffconvert-github-action@2.0.0
+        uses: citation-file-format/cffconvert-github-action@4cf11baa70a673bfdf9dad0acc7ee33b3f4b6084 # 2.0.0
         with:
           args: "--validate"
diff --git a/.gitignore b/.gitignore
index 524a12d066c4d..80bdd67619454 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@
 .DS_Store
 .idea/*
 .vscode/*
+.zed/*
 *.heapsnapshot
 .cache
 # Buildkite: Ignore the entire .buildkite directory
diff --git a/HISTORY.md b/HISTORY.md
index 12433ea5976fc..7fb01c7e9a0e9 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -74,11 +74,13 @@ Multi-threading changes
 -----------------------
 
 * `Threads.@threads` now supports the `:greedy` scheduler, intended for non-uniform workloads ([#52096]).
-* A new exported struct `Lockable{T, L<:AbstractLock}` makes it easy to bundle a resource and its lock together ([#52898]).
+* A new public (but unexported) struct `Base.Lockable{T, L<:AbstractLock}` makes it easy to bundle a resource and its lock together ([#52898]).
 
 Build system changes
 --------------------
 
+* There is a new `Makefile` to build Julia and LLVM using the profile-guided and link-time optimizations (PGO and LTO) strategies, see `contrib/pgo-lto/Makefile` ([#45641]).
+
 New library functions
 ---------------------
 
@@ -128,12 +130,6 @@ Standard library changes
 * The new `@styled_str` string macro provides a convenient way of creating a
   `AnnotatedString` with various faces or other attributes applied ([#49586]).
 
-#### JuliaSyntaxHighlighting
-
-* A new standard library for applying syntax highlighting to Julia code, this
-  uses `JuliaSyntax` and `StyledStrings` to implement a `highlight` function
-  that creates an `AnnotatedString` with syntax highlighting applied.
-
 #### Package Manager
 
 #### LinearAlgebra
@@ -420,6 +416,7 @@ Deprecated or removed
 [#44247]: https://github.com/JuliaLang/julia/issues/44247
 [#45164]: https://github.com/JuliaLang/julia/issues/45164
 [#45396]: https://github.com/JuliaLang/julia/issues/45396
+[#45641]: https://github.com/JuliaLang/julia/issues/45641
 [#45962]: https://github.com/JuliaLang/julia/issues/45962
 [#46196]: https://github.com/JuliaLang/julia/issues/46196
 [#46372]: https://github.com/JuliaLang/julia/issues/46372
diff --git a/Make.inc b/Make.inc
index 5064aa393ec95..039755ce34098 100644
--- a/Make.inc
+++ b/Make.inc
@@ -86,7 +86,7 @@ HAVE_SSP := 0
 WITH_GC_VERIFY := 0
 WITH_GC_DEBUG_ENV := 0
 
-# MMTk GC
+# Use MMTk GC
 WITH_MMTK ?= 0
 
 # Enable DTrace support
@@ -95,6 +95,9 @@ WITH_DTRACE := 0
 # Enable ITTAPI integration
 WITH_ITTAPI := 0
 
+# Enable NVTX integration
+WITH_NVTX := 0
+
 # Enable Tracy support
 WITH_TRACY := 0
 WITH_TRACY_CALLSTACKS := 0
@@ -495,7 +498,7 @@ MACOSX_VERSION_MIN := 11.0
 endif
 endif
 
-JCFLAGS_COMMON    := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
+JCFLAGS_COMMON    := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -Wformat -Wformat-security
 JCFLAGS_CLANG     := $(JCFLAGS_COMMON)
 JCFLAGS_GCC       := $(JCFLAGS_COMMON) -fno-gnu-unique
 
@@ -504,7 +507,7 @@ JCPPFLAGS_COMMON  := -fasynchronous-unwind-tables
 JCPPFLAGS_CLANG   := $(JCPPFLAGS_COMMON) -mllvm -enable-tail-merge=0
 JCPPFLAGS_GCC     := $(JCPPFLAGS_COMMON) -fno-tree-tail-merge
 
-JCXXFLAGS_COMMON  := -pipe $(fPIC) -fno-rtti -std=c++17
+JCXXFLAGS_COMMON  := -pipe $(fPIC) -fno-rtti -std=c++17 -Wformat -Wformat-security
 JCXXFLAGS_CLANG   := $(JCXXFLAGS_COMMON) -pedantic
 JCXXFLAGS_GCC     := $(JCXXFLAGS_COMMON) -fno-gnu-unique
 
@@ -516,6 +519,11 @@ SHIPFLAGS_COMMON  := -O3
 SHIPFLAGS_CLANG   := $(SHIPFLAGS_COMMON) -g
 SHIPFLAGS_GCC     := $(SHIPFLAGS_COMMON) -ggdb2 -falign-functions
 
+BOLT_LDFLAGS :=
+
+BOLT_CFLAGS_GCC    :=
+BOLT_CFLAGS_CLANG  :=
+
 ifeq ($(OS), Darwin)
 JCPPFLAGS_CLANG   += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1
 endif
@@ -532,7 +540,8 @@ JCFLAGS    := $(JCFLAGS_GCC)
 JCPPFLAGS  := $(JCPPFLAGS_GCC)
 JCXXFLAGS  := $(JCXXFLAGS_GCC)
 DEBUGFLAGS := $(DEBUGFLAGS_GCC)
-SHIPFLAGS  := $(SHIPFLAGS_GCC)
+SHIPFLAGS  := $(SHIPFLAGS_GCC) $(BOLT_CFLAGS_GCC)
+BOLT_CFLAGS  := $(BOLT_CFLAGS_GCC)
 endif
 
 ifeq ($(USECLANG),1)
@@ -542,7 +551,8 @@ JCFLAGS    := $(JCFLAGS_CLANG)
 JCPPFLAGS  := $(JCPPFLAGS_CLANG)
 JCXXFLAGS  := $(JCXXFLAGS_CLANG)
 DEBUGFLAGS := $(DEBUGFLAGS_CLANG)
-SHIPFLAGS  := $(SHIPFLAGS_CLANG)
+SHIPFLAGS  := $(SHIPFLAGS_CLANG) $(BOLT_CFLAGS_CLANG)
+BOLT_CFLAGS  := $(BOLT_CFLAGS_CLANG)
 
 ifeq ($(OS), Darwin)
 CC += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
@@ -849,6 +859,11 @@ JCXXFLAGS += -DUSE_TIMING_COUNTS
 JCFLAGS += -DUSE_TIMING_COUNTS
 endif
 
+ifeq ($(WITH_NVTX), 1)
+JCXXFLAGS += -DUSE_NVTX
+JCFLAGS += -DUSE_NVTX
+endif
+
 # ===========================================================================
 
 # Select the cpu architecture to target, or automatically detects the user's compiler
@@ -981,6 +996,15 @@ BINARY:=64
 MARCH=
 endif
 
+# Allow Clang to use CRC instructions (only applicable on AArch64)
+ifneq (,$(findstring aarch64,$(ARCH)))
+ifeq ($(USECLANG),1)
+ifeq (,$(MARCH))
+JCFLAGS += -mcrc
+endif
+endif
+endif
+
 # If we are running on powerpc64 or ppc64, fail out dramatically
 ifneq (,$(filter $(ARCH), powerpc64 ppc64))
 $(error Big-endian PPC64 is not supported, to ignore this error, set ARCH=ppc64le)
@@ -1112,20 +1136,13 @@ LIBUNWIND:=
 else ifneq ($(DISABLE_LIBUNWIND), 0)
 LIBUNWIND:=
 else
-ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
-ifneq ($(OS),Darwin)
 LIBUNWIND:=-lunwind
-# Only for linux since we want to use not yet released libunwind features
-JCFLAGS+=-DSYSTEM_LIBUNWIND
-JCPPFLAGS+=-DSYSTEM_LIBUNWIND
-endif
-else
 ifneq ($(findstring $(OS),Darwin OpenBSD),)
-LIBUNWIND:=-lunwind
 JCPPFLAGS+=-DLLVMLIBUNWIND
-else
-LIBUNWIND:=-lunwind
-endif
+else ifeq ($(USE_SYSTEM_LIBUNWIND), 1)
+# Only for linux and freebsd since we want to use not yet released gnu libunwind features
+JCFLAGS+=-DSYSTEM_LIBUNWIND
+JCPPFLAGS+=-DSYSTEM_LIBUNWIND
 endif
 endif
 
@@ -1319,7 +1336,7 @@ CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.33|GLIBCXX_3\.5\.|GLIBCXX_4\.
 # Note: we explicitly _do not_ define `CSL` here, since it requires some more
 # advanced techniques to decide whether it should be installed from a BB source
 # or not.  See `deps/csl.mk` for more detail.
-BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP LLD LIBTRACYCLIENT BOLT
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
@@ -1425,9 +1442,16 @@ OSLIBS += -lelf -lkvm -lrt -lpthread -latomic
 # make it loaded first to
 # prevent from linking to outdated system libs.
 # See #21788
+# TODO: Determine whether the condition here on AArch64 (added in #55089) should actually
+# be `ifneq ($(USE_BINARYBUILDER),0)`. We vendor a correctly versioned libgcc_s when using
+# BinaryBuilder which we want to link in early as noted above, but it could be the case
+# that without BinaryBuilder, regardless of architecture, we need to delay linking libgcc_s
+# to avoid getting the system one.
+ifeq (,$(findstring aarch64,$(ARCH)))
 OSLIBS += -lgcc_s
+endif
 
-OSLIBS += -Wl,--export-dynamic -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
+OSLIBS += -Wl,--export-dynamic -Wl,--undefined-version -Wl,--version-script=$(BUILDROOT)/src/julia.expmap \
 	$(NO_WHOLE_ARCHIVE)
 endif
 
diff --git a/Makefile b/Makefile
index 072a96b9fd362..4fd8b878c5d1f 100644
--- a/Makefile
+++ b/Makefile
@@ -82,7 +82,7 @@ julia-deps: | $(DIRS) $(build_datarootdir)/julia/base $(build_datarootdir)/julia
 julia-stdlib: | $(DIRS) julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/stdlib
 
-julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl
+julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl $(build_datarootdir)/julia/juliac.jl $(build_datarootdir)/julia/juliac-buildscript.jl
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/base
 
 julia-libccalltest: julia-deps
@@ -130,7 +130,7 @@ check-whitespace:
 ifneq ($(NO_GIT), 1)
 	@# Append the directory containing the julia we just built to the end of `PATH`,
 	@# to give us the best chance of being able to run this check.
-	@PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" $(JULIA_EXECUTABLE) $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl)
+	@PATH="$(PATH):$(dir $(JULIA_EXECUTABLE))" julia $(call cygpath_w,$(JULIAHOME)/contrib/check-whitespace.jl)
 else
 	$(warn "Skipping whitespace check because git is unavailable")
 endif
@@ -181,7 +181,7 @@ $(build_sysconfdir)/julia/startup.jl: $(JULIAHOME)/etc/startup.jl | $(build_sysc
 	@echo Creating usr/etc/julia/startup.jl
 	@cp $< $@
 
-$(build_datarootdir)/julia/julia-config.jl: $(JULIAHOME)/contrib/julia-config.jl | $(build_datarootdir)/julia
+$(build_datarootdir)/julia/%: $(JULIAHOME)/contrib/% | $(build_datarootdir)/julia
 	$(INSTALL_M) $< $(dir $@)
 
 $(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(build_depsbindir)
@@ -382,6 +382,11 @@ endif
 	cp -R -L $(JULIAHOME)/base/* $(DESTDIR)$(datarootdir)/julia/base
 	cp -R -L $(JULIAHOME)/test/* $(DESTDIR)$(datarootdir)/julia/test
 	cp -R -L $(build_datarootdir)/julia/* $(DESTDIR)$(datarootdir)/julia
+
+	# Set .jl sources as read-only to match package directories
+	find $(DESTDIR)$(datarootdir)/julia/base -type f -name \*.jl -exec chmod 0444 '{}' \;
+	find $(DESTDIR)$(datarootdir)/julia/test -type f -name \*.jl -exec chmod 0444 '{}' \;
+
 	# Copy documentation
 	cp -R -L $(BUILDROOT)/doc/_build/html $(DESTDIR)$(docdir)/
 	# Remove various files which should not be installed
@@ -403,6 +408,10 @@ endif
 	# Install appdata file
 	mkdir -p $(DESTDIR)$(datarootdir)/metainfo/
 	$(INSTALL_F) $(JULIAHOME)/contrib/julia.appdata.xml $(DESTDIR)$(datarootdir)/metainfo/
+	# Install terminal info database
+ifneq ($(WITH_TERMINFO),0)
+	cp -R -L $(build_datarootdir)/julia/terminfo $(DESTDIR)$(datarootdir)/julia/
+endif
 
 	# Update RPATH entries and JL_SYSTEM_IMAGE_PATH if $(private_libdir_rel) != $(build_private_libdir_rel)
 ifneq ($(private_libdir_rel),$(build_private_libdir_rel))
@@ -639,9 +648,6 @@ testall: check-whitespace $(JULIA_BUILD_MODE)
 testall1: check-whitespace $(JULIA_BUILD_MODE)
 	@env JULIA_CPU_THREADS=1 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
 
-testall3: check-whitespace $(JULIA_BUILD_MODE)
-	@env JULIA_CPU_THREADS=3 $(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/test all JULIA_BUILD_MODE=$(JULIA_BUILD_MODE)
-
 test-%: check-whitespace $(JULIA_BUILD_MODE) .FORCE
 	@([ $$(( $$(date +%s) - $$(date -r $(build_private_libdir)/sys.$(SHLIB_EXT) +%s) )) -le 100 ] && \
 		printf '\033[93m    HINT The system image was recently rebuilt. Are you aware of the test-revise-* targets? See CONTRIBUTING.md. \033[0m\n') || true
diff --git a/NEWS.md b/NEWS.md
index a29fe2d43ab39..9aebf5d42d954 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,8 @@ Julia v1.12 Release Notes
 New language features
 ---------------------
 
+- New option `--trim` for building "trimmed" binaries, where code not provably reachable from entry points
+  is removed. Entry points can be marked using `Base.Experimental.entrypoint` ([#55047]).
 - A new keyword argument `usings::Bool` has been added to `names`. By using this, we can now
   find all the names available in module `A` by `names(A; all=true, imported=true, usings=true)`. ([#54609])
 - the `@atomic(...)` macro family supports now the reference assignment syntax, e.g.
@@ -35,6 +37,10 @@ Language changes
    expression within a given `:toplevel` expression to make use of macros
    defined earlier in the same `:toplevel` expression. ([#53515])
 
+ - Trivial infinite loops (like `while true; end`) are no longer undefined
+   behavior. Infinite loops that actually do things (e.g. have side effects
+   or sleep) were never and are still not undefined behavior. ([#52999])
+
 Compiler/Runtime improvements
 -----------------------------
 
@@ -56,6 +62,8 @@ variables. ([#53742]).
 * `--project=@temp` starts Julia with a temporary environment.
 * New `--trace-compile-timing` option to report how long each method reported by `--trace-compile` took
   to compile, in ms. ([#54662])
+* `--trace-compile` now prints recompiled methods in yellow or with a trailing comment if color is not supported ([#55763])
+* New `--trace-dispatch` option to report methods that are dynamically dispatched ([#55848]).
 
 Multi-threading changes
 -----------------------
@@ -63,6 +71,8 @@ Multi-threading changes
 Build system changes
 --------------------
 
+* There are new `Makefile`s to build Julia and LLVM using the Binary Optimization and Layout Tool (BOLT), see  `contrib/bolt` and `contrib/pgo-lto-bolt` ([#54107]).
+
 New library functions
 ---------------------
 
@@ -70,6 +80,8 @@ New library functions
 * The new `isfull(c::Channel)` function can be used to check if `put!(c, some_value)` will block. ([#53159])
 * `waitany(tasks; throw=false)` and `waitall(tasks; failfast=false, throw=false)` which wait multiple tasks at once ([#53341]).
 * `uuid7()` creates an RFC 9652 compliant UUID with version 7 ([#54834]).
+* `insertdims(array; dims)` allows to insert singleton dimensions into an array which is the inverse operation to `dropdims`
+* The new `Fix` type is a generalization of `Fix1/Fix2` for fixing a single argument ([#54653]).
 
 New library features
 --------------------
@@ -90,20 +102,30 @@ New library features
   data-races. Or use the callback form of `open` to have all that handled
   automatically.
 * `@timed` now additionally returns the elapsed compilation and recompilation time ([#52889])
+* `escape_string` takes additional keyword arguments `ascii=true` (to escape all
+  non-ASCII characters) and `fullhex=true` (to require full 4/8-digit hex numbers
+  for u/U escapes, e.g. for C compatibility) [#55099]).
 * `filter` can now act on a `NamedTuple` ([#50795]).
 * `tempname` can now take a suffix string to allow the file name to include a suffix and include that suffix in
   the uniquing checking ([#53474])
 * `RegexMatch` objects can now be used to construct `NamedTuple`s and `Dict`s ([#50988])
+* `Lockable` is now exported ([#54595])
+* New `ltruncate`, `rtruncate` and `ctruncate` functions for truncating strings to text width, accounting for char widths ([#55351])
 
 Standard library changes
 ------------------------
 
 * `gcdx(0, 0)` now returns `(0, 0, 0)` instead of `(0, 1, 0)` ([#40989]).
+* `fd` returns a `RawFD` instead of an `Int` ([#55080]).
 
 #### StyledStrings
 
 #### JuliaSyntaxHighlighting
 
+* A new standard library for applying syntax highlighting to Julia code, this
+  uses `JuliaSyntax` and `StyledStrings` to implement a `highlight` function
+  that creates an `AnnotatedString` with syntax highlighting applied.
+
 #### Package Manager
 
 #### LinearAlgebra
@@ -114,6 +136,10 @@ Standard library changes
   between different eigendecomposition algorithms ([#49355]).
 * Added a generic version of the (unblocked) pivoted Cholesky decomposition
   (callable via `cholesky[!](A, RowMaximum())`) ([#54619]).
+* The number of default BLAS threads now respects process affinity, instead of
+  using total number of logical threads available on the system ([#55574]).
+* A new function `zeroslike` is added that is used to generate the zero elements for matrix-valued banded matrices.
+  Custom array types may specialize this function to return an appropriate result. ([#55252])
 
 #### Logging
 
@@ -121,6 +147,13 @@ Standard library changes
 
 #### Profile
 
+* `Profile.take_heap_snapshot` takes a new keyword argument, `redact_data::Bool`,
+  that is `true` by default. When set, the contents of Julia objects are not emitted
+  in the heap snapshot. This currently only applies to strings. ([#55326])
+* `Profile.print()` now colors Base/Core/Package modules similarly to how they are in stacktraces.
+  Also paths, even if truncated, are now clickable in terminals that support URI links
+  to take you to the specified `JULIA_EDITOR` for the given file & line number. ([#55335])
+
 #### Random
 
 #### REPL
@@ -151,12 +184,20 @@ Standard library changes
 
 #### InteractiveUtils
 
+* New macros `@trace_compile` and `@trace_dispatch` for running an expression with
+  `--trace-compile=stderr --trace-compile-timing` and `--trace-dispatch=stderr` respectively enabled.
+  ([#55915])
+
 Deprecated or removed
 ---------------------
 
 External dependencies
 ---------------------
 
+- The terminal info database, `terminfo`, is now vendored by default, providing a better
+  REPL user experience when `terminfo` is not available on the system. Julia can be built
+  without vendoring the database using the Makefile option `WITH_TERMINFO=0`. ([#55411])
+
 Tooling Improvements
 --------------------
 
diff --git a/README.md b/README.md
index 535d969cb6662..465adcf049922 100644
--- a/README.md
+++ b/README.md
@@ -57,10 +57,8 @@ New developers may find the notes in
 [CONTRIBUTING](https://github.com/JuliaLang/julia/blob/master/CONTRIBUTING.md)
 helpful to start contributing to the Julia codebase.
 
-### External Resources
+### Learning Julia
 
-- [**StackOverflow**](https://stackoverflow.com/questions/tagged/julia-lang)
-- [**Twitter**](https://twitter.com/JuliaLanguage)
 - [**Learning resources**](https://julialang.org/learning/)
 
 ## Binary Installation
@@ -94,7 +92,7 @@ and then use the command prompt to change into the resulting julia directory. By
 Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
 of Julia. You can get this version by running:
 
-    git checkout v1.10.3
+    git checkout v1.10.5
 
 To build the `julia` executable, run `make` from within the julia directory.
 
diff --git a/base/Base.jl b/base/Base.jl
index e92fa9efbd387..84e10ca788ba2 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -53,6 +53,9 @@ function setproperty!(x, f::Symbol, v)
     return setfield!(x, f, val)
 end
 
+typeof(function getproperty end).name.constprop_heuristic = Core.FORCE_CONST_PROP
+typeof(function setproperty! end).name.constprop_heuristic = Core.FORCE_CONST_PROP
+
 dotgetproperty(x, f) = getproperty(x, f)
 
 getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
@@ -224,7 +227,7 @@ delete_method(which(Pair{Any,Any}, (Any, Any)))
 end
 
 # The REPL stdlib hooks into Base using this Ref
-const REPL_MODULE_REF = Ref{Module}()
+const REPL_MODULE_REF = Ref{Module}(Base)
 
 include("checked.jl")
 using .Checked
@@ -303,7 +306,6 @@ end
 include("hashing.jl")
 include("rounding.jl")
 include("div.jl")
-include("rawbigints.jl")
 include("float.jl")
 include("twiceprecision.jl")
 include("complex.jl")
@@ -421,7 +423,6 @@ include("weakkeydict.jl")
 
 # ScopedValues
 include("scopedvalues.jl")
-using .ScopedValues
 
 # metaprogramming
 include("meta.jl")
@@ -614,6 +615,25 @@ function profile_printing_listener(cond::Base.AsyncCondition)
     nothing
 end
 
+function start_profile_listener()
+    cond = Base.AsyncCondition()
+    Base.uv_unref(cond.handle)
+    t = errormonitor(Threads.@spawn(profile_printing_listener(cond)))
+    atexit() do
+        # destroy this callback when exiting
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+        # this will prompt any ongoing or pending event to flush also
+        close(cond)
+        # error-propagation is not needed, since the errormonitor will handle printing that better
+        t === current_task() || _wait(t)
+    end
+    finalizer(cond) do c
+        # if something goes south, still make sure we aren't keeping a reference in C to this
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
+    end
+    ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), cond.handle)
+end
+
 function __init__()
     # Base library init
     global _atexit_hooks_finished = false
@@ -626,9 +646,9 @@ function __init__()
     init_active_project()
     append!(empty!(_sysimage_modules), keys(loaded_modules))
     empty!(explicit_loaded_modules)
-    @assert isempty(loaded_precompiles)
+    empty!(loaded_precompiles) # If we load a packageimage when building the image this might not be empty
     for (mod, key) in module_keys
-        loaded_precompiles[key => module_build_id(mod)] = mod
+        push!(get!(Vector{Module}, loaded_precompiles, key), mod)
     end
     if haskey(ENV, "JULIA_MAX_NUM_PRECOMPILE_FILES")
         MAX_NUM_PRECOMPILE_FILES[] = parse(Int, ENV["JULIA_MAX_NUM_PRECOMPILE_FILES"])
@@ -636,22 +656,7 @@ function __init__()
     # Profiling helper
     @static if !Sys.iswindows()
         # triggering a profile via signals is not implemented on windows
-        cond = Base.AsyncCondition()
-        Base.uv_unref(cond.handle)
-        t = errormonitor(Threads.@spawn(profile_printing_listener(cond)))
-        atexit() do
-            # destroy this callback when exiting
-            ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
-            # this will prompt any ongoing or pending event to flush also
-            close(cond)
-            # error-propagation is not needed, since the errormonitor will handle printing that better
-            _wait(t)
-        end
-        finalizer(cond) do c
-            # if something goes south, still make sure we aren't keeping a reference in C to this
-            ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), C_NULL)
-        end
-        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), cond.handle)
+        start_profile_listener()
     end
     _require_world_age[] = get_world_counter()
     # Prevent spawned Julia process from getting stuck waiting on Tracy to connect.
diff --git a/base/Enums.jl b/base/Enums.jl
index 6e9efd8ccde38..d4094945853ec 100644
--- a/base/Enums.jl
+++ b/base/Enums.jl
@@ -44,7 +44,7 @@ Base.print(io::IO, x::Enum) = print(io, _symbol(x))
 function Base.show(io::IO, x::Enum)
     sym = _symbol(x)
     if !(get(io, :compact, false)::Bool)
-        from = get(io, :module, Base.active_module())
+        from = get(io, :module, Main)
         def = parentmodule(typeof(x))
         if from === nothing || !Base.isvisible(sym, def, from)
             show(io, def)
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index a9adae86b9ef2..e877a87c2cdd1 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -1101,11 +1101,8 @@ function copyto_unaliased!(deststyle::IndexStyle, dest::AbstractArray, srcstyle:
             end
         else
             # Dual-iterator implementation
-            ret = iterate(iterdest)
-            @inbounds for a in src
-                idx, state = ret::NTuple{2,Any}
-                dest[idx] = a
-                ret = iterate(iterdest, state)
+            for (Idest, Isrc) in zip(iterdest, itersrc)
+                @inbounds dest[Idest] = src[Isrc]
             end
         end
     end
@@ -1264,6 +1261,10 @@ function _memory_offset(x::AbstractArray, I::Vararg{Any,N}) where {N}
     return sum(map((i, s, o)->s*(i-o), J, strides(x), Tuple(first(CartesianIndices(x)))))*elsize(x)
 end
 
+## Special constprop heuristics for getindex/setindex
+typename(typeof(function getindex end)).constprop_heuristic = Core.ARRAY_INDEX_HEURISTIC
+typename(typeof(function setindex! end)).constprop_heuristic = Core.ARRAY_INDEX_HEURISTIC
+
 ## Approach:
 # We only define one fallback method on getindex for all argument types.
 # That dispatches to an (inlined) internal _getindex function, where the goal is
@@ -1659,10 +1660,10 @@ typed_vcat(::Type{T}) where {T} = Vector{T}()
 typed_hcat(::Type{T}) where {T} = Vector{T}()
 
 ## cat: special cases
-vcat(X::T...) where {T}         = T[ X[i] for i=1:length(X) ]
-vcat(X::T...) where {T<:Number} = T[ X[i] for i=1:length(X) ]
-hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=1:length(X) ]
-hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=1:length(X) ]
+vcat(X::T...) where {T}         = T[ X[i] for i=eachindex(X) ]
+vcat(X::T...) where {T<:Number} = T[ X[i] for i=eachindex(X) ]
+hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=eachindex(X) ]
+hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=eachindex(X) ]
 
 vcat(X::Number...) = hvcat_fill!(Vector{promote_typeof(X...)}(undef, length(X)), X)
 hcat(X::Number...) = hvcat_fill!(Matrix{promote_typeof(X...)}(undef, 1,length(X)), X)
@@ -1917,7 +1918,7 @@ julia> vcat(range(1, 2, length=3))  # collects lazy ranges
  2.0
 
 julia> two = ([10, 20, 30]', Float64[4 5 6; 7 8 9])  # row vector and a matrix
-([10 20 30], [4.0 5.0 6.0; 7.0 8.0 9.0])
+(adjoint([10, 20, 30]), [4.0 5.0 6.0; 7.0 8.0 9.0])
 
 julia> vcat(two...)
 3×3 Matrix{Float64}:
@@ -3007,7 +3008,7 @@ end
 @inline function _stack_size_check(x, ax1::Tuple)
     if _iterator_axes(x) != ax1
         uax1 = map(UnitRange, ax1)
-        uaxN = map(UnitRange, axes(x))
+        uaxN = map(UnitRange, _iterator_axes(x))
         throw(DimensionMismatch(
             LazyString("stack expects uniform slices, got axes(x) == ", uaxN, " while first had ", uax1)))
     end
@@ -3404,6 +3405,8 @@ mapany(f, itr) = Any[f(x) for x in itr]
 Transform collection `c` by applying `f` to each element. For multiple collection arguments,
 apply `f` elementwise, and stop when any of them is exhausted.
 
+The element type of the result is determined in the same manner as in [`collect`](@ref).
+
 See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).
 
 # Examples
@@ -3519,6 +3522,36 @@ julia> map(+, [1 2; 3 4], [1,10,100,1000], zeros(3,1))  # iterates until 3rd is
 """
 map(f, it, iters...) = collect(Generator(f, it, iters...))
 
+# Generic versions of push! for AbstractVector
+# These are specialized further for Vector for faster resizing and setindexing
+function push!(a::AbstractVector{T}, item) where T
+    # convert first so we don't grow the array if the assignment won't work
+    itemT = item isa T ? item : convert(T, item)::T
+    new_length = length(a) + 1
+    resize!(a, new_length)
+    a[end] = itemT
+    return a
+end
+
+# specialize and optimize the single argument case
+function push!(a::AbstractVector{Any}, @nospecialize x)
+    new_length = length(a) + 1
+    resize!(a, new_length)
+    a[end] = x
+    return a
+end
+function push!(a::AbstractVector{Any}, @nospecialize x...)
+    @_terminates_locally_meta
+    na = length(a)
+    nx = length(x)
+    resize!(a, na + nx)
+    e = lastindex(a) - nx
+    for i = 1:nx
+        a[e+i] = x[i]
+    end
+    return a
+end
+
 # multi-item push!, pushfirst! (built on top of type-specific 1-item version)
 # (note: must not cause a dispatch loop when 1-item case is not defined)
 push!(A, a, b) = push!(push!(A, a), b)
@@ -3526,6 +3559,9 @@ push!(A, a, b, c...) = push!(push!(A, a, b), c...)
 pushfirst!(A, a, b) = pushfirst!(pushfirst!(A, b), a)
 pushfirst!(A, a, b, c...) = pushfirst!(pushfirst!(A, c...), a, b)
 
+# sizehint! does not nothing by default
+sizehint!(a::AbstractVector, _) = a
+
 ## hashing AbstractArray ##
 
 const hash_abstractarray_seed = UInt === UInt64 ? 0x7e2d6fb6448beb77 : 0xd4514ce5
diff --git a/base/abstractarraymath.jl b/base/abstractarraymath.jl
index a9efc2b87bee4..0f028a0f66729 100644
--- a/base/abstractarraymath.jl
+++ b/base/abstractarraymath.jl
@@ -93,6 +93,70 @@ function _dropdims(A::AbstractArray, dims::Dims)
 end
 _dropdims(A::AbstractArray, dim::Integer) = _dropdims(A, (Int(dim),))
 
+
+"""
+    insertdims(A; dims)
+
+Inverse of [`dropdims`](@ref); return an array with new singleton dimensions
+at every dimension in `dims`.
+
+Repeated dimensions are forbidden and the largest entry in `dims` must be
+less than or equal than `ndims(A) + length(dims)`.
+
+The result shares the same underlying data as `A`, such that the
+result is mutable if and only if `A` is mutable, and setting elements of one
+alters the values of the other.
+
+See also: [`dropdims`](@ref), [`reshape`](@ref), [`vec`](@ref).
+# Examples
+```jldoctest
+julia> x = [1 2 3; 4 5 6]
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+
+julia> insertdims(x, dims=3)
+2×3×1 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2  3
+ 4  5  6
+
+julia> insertdims(x, dims=(1,2,5)) == reshape(x, 1, 1, 2, 3, 1)
+true
+
+julia> dropdims(insertdims(x, dims=(1,2,5)), dims=(1,2,5))
+2×3 Matrix{Int64}:
+ 1  2  3
+ 4  5  6
+```
+
+!!! compat "Julia 1.12"
+    Requires Julia 1.12 or later.
+"""
+insertdims(A; dims) = _insertdims(A, dims)
+function _insertdims(A::AbstractArray{T, N}, dims::NTuple{M, Int}) where {T, N, M}
+    for i in eachindex(dims)
+        1 ≤ dims[i] || throw(ArgumentError("the smallest entry in dims must be ≥ 1."))
+        dims[i] ≤ N+M || throw(ArgumentError("the largest entry in dims must be not larger than the dimension of the array and the length of dims added"))
+        for j = 1:i-1
+            dims[j] == dims[i] && throw(ArgumentError("inserted dims must be unique"))
+        end
+    end
+
+    # acc is a tuple, where the first entry is the final shape
+    # the second entry off acc is a counter for the axes of A
+    inds= Base._foldoneto((acc, i) ->
+                            i ∈ dims
+                                ? ((acc[1]..., Base.OneTo(1)), acc[2])
+                                : ((acc[1]..., axes(A, acc[2])), acc[2] + 1),
+                            ((), 1), Val(N+M))
+    new_shape = inds[1]
+    return reshape(A, new_shape)
+end
+_insertdims(A::AbstractArray, dim::Integer) = _insertdims(A, (Int(dim),))
+
+
+
 ## Unary operators ##
 
 """
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 62a5b3ee9e1b0..85a726b4cdbf4 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -392,6 +392,10 @@ Dict{String, Float64} with 3 entries:
 
 julia> ans == mergewith(+)(a, b)
 true
+
+julia> mergewith(-, Dict(), Dict(:a=>1))  # Combining function only used if key is present in both
+Dict{Any, Any} with 1 entry:
+  :a => 1
 ```
 """
 mergewith(combine, d::AbstractDict, others::AbstractDict...) =
diff --git a/base/array.jl b/base/array.jl
index 32c543ff12638..5b3e6cc398479 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -415,7 +415,7 @@ function fill!(a::Union{Array{UInt8}, Array{Int8}}, x::Integer)
     ref = a.ref
     t = @_gc_preserve_begin ref
     p = unsafe_convert(Ptr{Cvoid}, ref)
-    memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a))
+    memset(p, x isa eltype(a) ? x : convert(eltype(a), x), length(a) % UInt)
     @_gc_preserve_end t
     return a
 end
@@ -660,7 +660,7 @@ _array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(it
 
 
 """
-    collect(collection)
+    collect(iterator)
 
 Return an `Array` of all items in a collection or iterator. For dictionaries, returns
 a `Vector` of `key=>value` [Pair](@ref Pair)s. If the argument is array-like or is an iterator
@@ -671,6 +671,9 @@ Used by [comprehensions](@ref man-comprehensions) to turn a [generator expressio
 into an `Array`. Thus, *on generators*, the square-brackets notation may be used instead of calling `collect`,
 see second example.
 
+The element type of the returned array is based on the types of the values collected. However, if the
+iterator is empty then the element type of the returned (empty) array is determined by type inference.
+
 # Examples
 
 Collect items from a `UnitRange{Int64}` collection:
@@ -692,6 +695,21 @@ julia> collect(x^2 for x in 1:3)
  4
  9
 ```
+
+Collecting an empty iterator where the result type depends on type inference:
+
+```jldoctest
+julia> [rand(Bool) ? 1 : missing for _ in []]
+Union{Missing, Int64}[]
+```
+
+When the iterator is non-empty, the result type depends only on values:
+
+```julia-repl
+julia> [rand(Bool) ? 1 : missing for _ in [""]]
+1-element Vector{Int64}:
+ 1
+```
 """
 collect(itr) = _collect(1:1 #= Array =#, itr, IteratorEltype(itr), IteratorSize(itr))
 
@@ -3064,3 +3082,56 @@ intersect(r::AbstractRange, v::AbstractVector) = intersect(v, r)
         _getindex(v, i)
     end
 end
+
+"""
+    wrap(Array, m::Union{Memory{T}, MemoryRef{T}}, dims)
+
+Create an array of size `dims` using `m` as the underlying memory. This can be thought of as a safe version
+of [`unsafe_wrap`](@ref) utilizing `Memory` or `MemoryRef` instead of raw pointers.
+"""
+function wrap end
+
+# validity checking for _wrap calls, separate from allocation of Array so that it can be more likely to inline into the caller
+function _wrap(ref::MemoryRef{T}, dims::NTuple{N, Int}) where {T, N}
+    mem = ref.mem
+    mem_len = length(mem) + 1 - memoryrefoffset(ref)
+    len = Core.checked_dims(dims...)
+    @boundscheck mem_len >= len || invalid_wrap_err(mem_len, dims, len)
+    if N != 1 && !(ref === GenericMemoryRef(mem) && len === mem_len)
+        mem = ccall(:jl_genericmemory_slice, Memory{T}, (Any, Ptr{Cvoid}, Int), mem, ref.ptr_or_offset, len)
+        ref = memoryref(mem)
+    end
+    return ref
+end
+
+@noinline invalid_wrap_err(len, dims, proddims) = throw(DimensionMismatch(LazyString(
+    "Attempted to wrap a MemoryRef of length ", len, " with an Array of size dims=", dims,
+    " which is invalid because prod(dims) = ", proddims, " > ", len,
+    " so that the array would have more elements than the underlying memory can store.")))
+
+@eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, dims::NTuple{N, Integer}) where {T, N}
+    dims = convert(Dims, dims)
+    ref = _wrap(m, dims)
+    $(Expr(:new, :(Array{T, N}), :ref, :dims))
+end
+
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, dims::NTuple{N, Integer}) where {T, N}
+    dims = convert(Dims, dims)
+    ref = _wrap(memoryref(m), dims)
+    $(Expr(:new, :(Array{T, N}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::MemoryRef{T}, l::Integer) where {T}
+    dims = (Int(l),)
+    ref = _wrap(m, dims)
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}, l::Integer) where {T}
+    dims = (Int(l),)
+    ref = _wrap(memoryref(m), (l,))
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
+@eval @propagate_inbounds function wrap(::Type{Array}, m::Memory{T}) where {T}
+    ref = memoryref(m)
+    dims = (length(m),)
+    $(Expr(:new, :(Array{T, 1}), :ref, :dims))
+end
diff --git a/base/asyncevent.jl b/base/asyncevent.jl
index 3c782be10e194..c6cb3d3fa73bb 100644
--- a/base/asyncevent.jl
+++ b/base/asyncevent.jl
@@ -127,8 +127,11 @@ function _trywait(t::Union{Timer, AsyncCondition})
         t isa Timer || Core.Intrinsics.atomic_fence(:acquire_release)
     else
         if !isopen(t)
-            close(t) # wait for the close to complete
-            return false
+            set = t.set
+            if !set
+                close(t) # wait for the close to complete
+                return false
+            end
         end
         iolock_begin()
         set = t.set
@@ -151,7 +154,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
         end
         iolock_end()
     end
-    @atomic :monotonic t.set = false
+    @atomic :monotonic t.set = false # if there are multiple waiters, an unspecified number may short-circuit past here
     return set
 end
 
@@ -161,14 +164,14 @@ function wait(t::Union{Timer, AsyncCondition})
 end
 
 
-isopen(t::Union{Timer, AsyncCondition}) = t.isopen && t.handle != C_NULL
+isopen(t::Union{Timer, AsyncCondition}) = @atomic :acquire t.isopen
 
 function close(t::Union{Timer, AsyncCondition})
-    t.handle == C_NULL && return # short-circuit path
+    t.handle == C_NULL && !t.isopen && return # short-circuit path, :monotonic
     iolock_begin()
     if t.handle != C_NULL
         if t.isopen
-            @atomic :monotonic t.isopen = false
+            @atomic :release t.isopen = false
             ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
         end
         # implement _trywait here without the auto-reset function, just waiting for the final close signal
@@ -186,6 +189,8 @@ function close(t::Union{Timer, AsyncCondition})
             unlock(t.cond)
             unpreserve_handle(t)
         end
+    elseif t.isopen
+        @atomic :release t.isopen = false
     end
     iolock_end()
     nothing
@@ -198,8 +203,8 @@ function uvfinalize(t::Union{Timer, AsyncCondition})
         if t.handle != C_NULL
             disassociate_julia_struct(t.handle) # not going to call the usual close hooks anymore
             if t.isopen
-                @atomic :monotonic t.isopen = false
-                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
+                @atomic :release t.isopen = false
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle) # this will call Libc.free
             end
             @atomic :monotonic t.handle = C_NULL
             notify(t.cond, false)
@@ -214,8 +219,10 @@ end
 function _uv_hook_close(t::Union{Timer, AsyncCondition})
     lock(t.cond)
     try
-        @atomic :monotonic t.isopen = false
-        Libc.free(@atomicswap :monotonic t.handle = C_NULL)
+        handle = t.handle
+        @atomic :release t.isopen = false
+        @atomic :monotonic t.handle = C_NULL
+        Libc.free(handle)
         notify(t.cond, false)
     finally
         unlock(t.cond)
@@ -243,7 +250,7 @@ function uv_timercb(handle::Ptr{Cvoid})
         if ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 0
             # timer is stopped now
             if t.isopen
-                @atomic :monotonic t.isopen = false
+                @atomic :release t.isopen = false
                 ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
             end
         end
diff --git a/base/asyncmap.jl b/base/asyncmap.jl
index c81afbb7e9115..02e515d2e0c6c 100644
--- a/base/asyncmap.jl
+++ b/base/asyncmap.jl
@@ -9,6 +9,8 @@ Uses multiple concurrent tasks to map `f` over a collection (or multiple
 equal length collections). For multiple collection arguments, `f` is
 applied elementwise.
 
+The output is guaranteed to be the same order as the elements of the collection(s) `c`.
+
 `ntasks` specifies the number of tasks to run concurrently.
 Depending on the length of the collections, if `ntasks` is unspecified,
 up to 100 tasks will be used for concurrent mapping.
diff --git a/base/boot.jl b/base/boot.jl
index 1481928b319b7..608e273d4b514 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -284,7 +284,8 @@ macro _foldable_meta()
         #=:inaccessiblememonly=#true,
         #=:noub=#true,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
 end
 
 macro inline()   Expr(:meta, :inline)   end
@@ -1012,6 +1013,35 @@ const check_top_bit = check_sign_bit
 EnterNode(old::EnterNode, new_dest::Int) = isdefined(old, :scope) ?
     EnterNode(new_dest, old.scope) : EnterNode(new_dest)
 
+# typename(_).constprop_heuristic
+const FORCE_CONST_PROP      = 0x1
+const ARRAY_INDEX_HEURISTIC = 0x2
+const ITERATE_HEURISTIC     = 0x3
+const SAMETYPE_HEURISTIC    = 0x4
+
+# `typename` has special tfunc support in inference to improve
+# the result for `Type{Union{...}}`. It is defined here, so that the Compiler
+# can look it up by value.
+struct TypeNameError <: Exception
+    a
+    TypeNameError(@nospecialize(a)) = new(a)
+end
+
+typename(a) = throw(TypeNameError(a))
+typename(a::DataType) = a.name
+function typename(a::Union)
+    ta = typename(a.a)
+    tb = typename(a.b)
+    ta === tb || throw(TypeNameError(a))
+    return tb
+end
+typename(union::UnionAll) = typename(union.body)
+
+# Special inference support to avoid execess specialization of these methods.
+# TODO: Replace this by a generic heuristic.
+(>:)(@nospecialize(a), @nospecialize(b)) = (b <: a)
+(!==)(@nospecialize(a), @nospecialize(b)) = Intrinsics.not_int(a === b)
+
 include(Core, "optimized_generics.jl")
 
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index 57eac7f3a094c..927c946e53e02 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -751,6 +751,7 @@ The resulting container type is established by the following rules:
  - All other combinations of arguments default to returning an `Array`, but
    custom container types can define their own implementation and promotion-like
    rules to customize the result when they appear as arguments.
+ - The element type is determined in the same manner as in [`collect`](@ref).
 
 A special syntax exists for broadcasting: `f.(args...)` is equivalent to
 `broadcast(f, args...)`, and nested `f.(g.(args...))` calls are fused into a
diff --git a/base/char.jl b/base/char.jl
index bc68a672ce0ca..2e8410f6903e2 100644
--- a/base/char.jl
+++ b/base/char.jl
@@ -223,6 +223,7 @@ hash(x::Char, h::UInt) =
     hash_uint64(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h))
 
 first_utf8_byte(c::Char) = (bitcast(UInt32, c) >> 24) % UInt8
+first_utf8_byte(c::AbstractChar) = first_utf8_byte(Char(c)::Char)
 
 # fallbacks:
 isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
diff --git a/base/client.jl b/base/client.jl
index f6b83ecd0f4a0..a04556507d5dc 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -41,7 +41,6 @@ function repl_cmd(cmd, out)
     if isempty(cmd.exec)
         throw(ArgumentError("no cmd to execute"))
     elseif cmd.exec[1] == "cd"
-        new_oldpwd = pwd()
         if length(cmd.exec) > 2
             throw(ArgumentError("cd method only takes one argument"))
         elseif length(cmd.exec) == 2
@@ -52,11 +51,17 @@ function repl_cmd(cmd, out)
                 end
                 dir = ENV["OLDPWD"]
             end
-            cd(dir)
         else
-            cd()
+            dir = homedir()
         end
-        ENV["OLDPWD"] = new_oldpwd
+        try
+            ENV["OLDPWD"] = pwd()
+        catch ex
+            ex isa IOError || rethrow()
+            # if current dir has been deleted, then pwd() will throw an IOError: pwd(): no such file or directory (ENOENT)
+            delete!(ENV, "OLDPWD")
+        end
+        cd(dir)
         println(out, pwd())
     else
         @static if !Sys.iswindows()
@@ -292,12 +297,12 @@ function exec_options(opts)
             invokelatest(show, Core.eval(Main, parse_input_line(arg)))
             println()
         elseif cmd == 'm'
-            @eval Main import $(Symbol(arg)).main
+            entrypoint = push!(split(arg, "."), "main")
+            Base.eval(Main, Expr(:import, Expr(:., Symbol.(entrypoint)...)))
             if !should_use_main_entrypoint()
                 error("`main` in `$arg` not declared as entry point (use `@main` to do so)")
             end
             return false
-
         elseif cmd == 'L'
             # load file immediately on all processors
             if !distributed_mode
@@ -339,11 +344,13 @@ function _global_julia_startup_file()
     # If it is not found, then continue on to the relative path based on Sys.BINDIR
     BINDIR = Sys.BINDIR
     SYSCONFDIR = Base.SYSCONFDIR
+    p1 = nothing
     if !isempty(SYSCONFDIR)
         p1 = abspath(BINDIR, SYSCONFDIR, "julia", "startup.jl")
         isfile(p1) && return p1
     end
     p2 = abspath(BINDIR, "..", "etc", "julia", "startup.jl")
+    p1 == p2 && return nothing # don't check the same path twice
     isfile(p2) && return p2
     return nothing
 end
@@ -415,79 +422,98 @@ function load_REPL()
     return nothing
 end
 
-global active_repl
+global active_repl::Any
+global active_repl_backend = nothing
+
+function run_fallback_repl(interactive::Bool)
+    let input = stdin
+        if isa(input, File) || isa(input, IOStream)
+            # for files, we can slurp in the whole thing at once
+            ex = parse_input_line(read(input, String))
+            if Meta.isexpr(ex, :toplevel)
+                # if we get back a list of statements, eval them sequentially
+                # as if we had parsed them sequentially
+                for stmt in ex.args
+                    eval_user_input(stderr, stmt, true)
+                end
+                body = ex.args
+            else
+                eval_user_input(stderr, ex, true)
+            end
+        else
+            while !eof(input)
+                if interactive
+                    print("julia> ")
+                    flush(stdout)
+                end
+                try
+                    line = ""
+                    ex = nothing
+                    while !eof(input)
+                        line *= readline(input, keep=true)
+                        ex = parse_input_line(line)
+                        if !(isa(ex, Expr) && ex.head === :incomplete)
+                            break
+                        end
+                    end
+                    eval_user_input(stderr, ex, true)
+                catch err
+                    isa(err, InterruptException) ? print("\n\n") : rethrow()
+                end
+            end
+        end
+    end
+    nothing
+end
+
+function run_std_repl(REPL::Module, quiet::Bool, banner::Symbol, history_file::Bool)
+    term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
+    term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
+    banner == :no || REPL.banner(term, short=banner==:short)
+    if term.term_type == "dumb"
+        repl = REPL.BasicREPL(term)
+        quiet || @warn "Terminal not fully functional"
+    else
+        repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
+        repl.history_file = history_file
+    end
+    # Make sure any displays pushed in .julia/config/startup.jl ends up above the
+    # REPLDisplay
+    d = REPL.REPLDisplay(repl)
+    last_active_repl = @isdefined(active_repl) ? active_repl : nothing
+    last_active_repl_backend = active_repl_backend
+    global active_repl = repl
+    pushdisplay(d)
+    try
+        global active_repl = repl
+        _atreplinit(repl)
+        REPL.run_repl(repl, backend->(global active_repl_backend = backend))
+    finally
+        popdisplay(d)
+        active_repl = last_active_repl
+        active_repl_backend = last_active_repl_backend
+    end
+    nothing
+end
 
 # run the requested sort of evaluation loop on stdio
 function run_main_repl(interactive::Bool, quiet::Bool, banner::Symbol, history_file::Bool)
     fallback_repl = parse(Bool, get(ENV, "JULIA_FALLBACK_REPL", "false"))
     if !fallback_repl && interactive
         load_InteractiveUtils()
-        if !isassigned(REPL_MODULE_REF)
+        REPL = REPL_MODULE_REF[]
+        if REPL === Base
             load_REPL()
         end
     end
-    # TODO cleanup REPL_MODULE_REF
-    if !fallback_repl && interactive && isassigned(REPL_MODULE_REF)
-        invokelatest(REPL_MODULE_REF[]) do REPL
-            term_env = get(ENV, "TERM", @static Sys.iswindows() ? "" : "dumb")
-            term = REPL.Terminals.TTYTerminal(term_env, stdin, stdout, stderr)
-            banner == :no || REPL.banner(term, short=banner==:short)
-            if term.term_type == "dumb"
-                repl = REPL.BasicREPL(term)
-                quiet || @warn "Terminal not fully functional"
-            else
-                repl = REPL.LineEditREPL(term, get(stdout, :color, false), true)
-                repl.history_file = history_file
-            end
-            global active_repl = repl
-            # Make sure any displays pushed in .julia/config/startup.jl ends up above the
-            # REPLDisplay
-            pushdisplay(REPL.REPLDisplay(repl))
-            _atreplinit(repl)
-            REPL.run_repl(repl, backend->(global active_repl_backend = backend))
-        end
+    REPL = REPL_MODULE_REF[]
+    if !fallback_repl && interactive && REPL !== Base
+        invokelatest(run_std_repl, REPL, quiet, banner, history_file)
     else
-        # otherwise provide a simple fallback
         if !fallback_repl && interactive && !quiet
             @warn "REPL provider not available: using basic fallback" LOAD_PATH=join(Base.LOAD_PATH, Sys.iswindows() ? ';' : ':')
         end
-        let input = stdin
-            if isa(input, File) || isa(input, IOStream)
-                # for files, we can slurp in the whole thing at once
-                ex = parse_input_line(read(input, String))
-                if Meta.isexpr(ex, :toplevel)
-                    # if we get back a list of statements, eval them sequentially
-                    # as if we had parsed them sequentially
-                    for stmt in ex.args
-                        eval_user_input(stderr, stmt, true)
-                    end
-                    body = ex.args
-                else
-                    eval_user_input(stderr, ex, true)
-                end
-            else
-                while !eof(input)
-                    if interactive
-                        print("julia> ")
-                        flush(stdout)
-                    end
-                    try
-                        line = ""
-                        ex = nothing
-                        while !eof(input)
-                            line *= readline(input, keep=true)
-                            ex = parse_input_line(line)
-                            if !(isa(ex, Expr) && ex.head === :incomplete)
-                                break
-                            end
-                        end
-                        eval_user_input(stderr, ex, true)
-                    catch err
-                        isa(err, InterruptException) ? print("\n\n") : rethrow()
-                    end
-                end
-            end
-        end
+        run_fallback_repl(interactive)
     end
     nothing
 end
@@ -613,10 +639,10 @@ module MyApp
 end
 const main = MyApp.main
 # `julia` Will *NOT* execute MyApp.main unless there is a separate `@main` annotation in `Main`
+```
 
 !!! compat "Julia 1.11"
     This macro is new in Julia 1.11. At present, the precise semantics of `@main` are still subject to change.
-```
 """
 macro main(args...)
     if !isempty(args)
diff --git a/base/cmd.jl b/base/cmd.jl
index 202527abdf644..84ec52f865e98 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -482,7 +482,7 @@ function cmd_gen(parsed)
     end
 end
 
-@assume_effects :effect_free :terminates_globally :noub function cmd_gen(
+@assume_effects :foldable !:consistent function cmd_gen(
     parsed::Tuple{Vararg{Tuple{Vararg{Union{String, SubString{String}}}}}}
 )
     return @invoke cmd_gen(parsed::Any)
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index 4c3286c7e2737..c8a25be422637 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -1,5 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+struct SlotRefinement
+    slot::SlotNumber
+    typ::Any
+    SlotRefinement(slot::SlotNumber, @nospecialize(typ)) = new(slot, typ)
+end
+
 # See if the inference result of the current statement's result value might affect
 # the final answer for the method (aside from optimization potential and exceptions).
 # To do that, we need to check both for slot assignment and SSA usage.
@@ -7,227 +13,244 @@ call_result_unused(sv::InferenceState, currpc::Int) =
     isexpr(sv.src.code[currpc], :call) && isempty(sv.ssavalue_uses[currpc])
 call_result_unused(si::StmtInfo) = !si.used
 
+is_const_bool_or_bottom(@nospecialize(b)) = (isa(b, Const) && isa(b.val, Bool)) || b == Bottom
+function can_propagate_conditional(@nospecialize(rt), argtypes::Vector{Any})
+    isa(rt, InterConditional) || return false
+    if rt.slot > length(argtypes)
+        # In the vararg tail - can't be conditional
+        @assert isvarargtype(argtypes[end])
+        return false
+    end
+    return isa(argtypes[rt.slot], Conditional) &&
+        is_const_bool_or_bottom(rt.thentype) && is_const_bool_or_bottom(rt.thentype)
+end
+
+function propagate_conditional(rt::InterConditional, cond::Conditional)
+    new_thentype = rt.thentype === Const(false) ? cond.elsetype : cond.thentype
+    new_elsetype = rt.elsetype === Const(true) ? cond.thentype : cond.elsetype
+    if rt.thentype == Bottom
+        @assert rt.elsetype != Bottom
+        return Conditional(cond.slot, Bottom, new_elsetype)
+    elseif rt.elsetype == Bottom
+        @assert rt.thentype != Bottom
+        return Conditional(cond.slot, new_thentype, Bottom)
+    end
+    return Conditional(cond.slot, new_thentype, new_elsetype)
+end
+
 function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
                                   arginfo::ArgInfo, si::StmtInfo, @nospecialize(atype),
                                   sv::AbsIntState, max_methods::Int)
     𝕃ₚ, 𝕃ᵢ = ipo_lattice(interp), typeinf_lattice(interp)
-    ⊑ₚ = ⊑(𝕃ₚ)
-    if !should_infer_this_call(interp, sv)
-        add_remark!(interp, sv, "Skipped call in throw block")
-        # At this point we are guaranteed to end up throwing on this path,
-        # which is all that's required for :consistent-cy. Of course, we don't
-        # know anything else about this statement.
-        effects = Effects(; consistent=ALWAYS_TRUE)
-        return CallMeta(Any, Any, effects, NoCallInfo())
-    end
-
+    ⊑ₚ, ⋤ₚ, ⊔ₚ, ⊔ᵢ  = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ), join(𝕃ᵢ)
     argtypes = arginfo.argtypes
     matches = find_method_matches(interp, argtypes, atype; max_methods)
     if isa(matches, FailedMethodMatch)
         add_remark!(interp, sv, matches.reason)
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     end
 
     (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
-    napplicable = length(applicable)
-    rettype = excttype = Bottom
+
+    # final result
+    gfresult = Future{CallMeta}()
+    # intermediate work for computing gfresult
+    rettype = exctype = Bottom
     edges = MethodInstance[]
     conditionals = nothing # keeps refinement information of call argument types when the return type is boolean
-    seen = 0               # number of signatures actually inferred
+    seenall = true
     const_results = nothing # or const_results::Vector{Union{Nothing,ConstResult}} if any const results are available
-    multiple_matches = napplicable > 1
     fargs = arginfo.fargs
     all_effects = EFFECTS_TOTAL
-
-    for i in 1:napplicable
-        match = applicable[i]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            # only infer concrete call sites in top-level expressions
-            add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
-            break
-        end
-        this_rt = Bottom
-        this_exct = Bottom
-        splitunions = false
-        # TODO: this used to trigger a bug in inference recursion detection, and is unmaintained now
-        # sigtuple = unwrap_unionall(sig)::DataType
-        # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
-        if splitunions
-            splitsigs = switchtupleunion(sig)
-            for sig_n in splitsigs
-                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)
-                (; rt, exct, edge, effects, volatile_inf_result) = result
+    slotrefinements = nothing # keeps refinement information on slot types obtained from call signature
+
+    # split the for loop off into a function, so that we can pause and restart it at will
+    i::Int = 1
+    f = Core.Box(f)
+    atype = Core.Box(atype)
+    function infercalls(interp, sv)
+        napplicable = length(applicable)
+        multiple_matches = napplicable > 1
+        while i <= napplicable
+            match = applicable[i]::MethodMatch
+            method = match.method
+            sig = match.spec_types
+            if bail_out_toplevel_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
+                # only infer concrete call sites in top-level expressions
+                add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
+                seenall = false
+                break
+            end
+            # TODO: this is unmaintained now as it didn't seem to improve things, though it does avoid hard-coding the union split at the higher level,
+            # it also can hurt infer-ability of some constrained parameter types (e.g. quacks like a duck)
+            # sigtuple = unwrap_unionall(sig)::DataType
+            # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).max_union_splitting
+            #if splitunions
+            #    splitsigs = switchtupleunion(sig)
+            #    for sig_n in splitsigs
+            #        result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, si, sv)::Future
+            #        handle1(...)
+            #    end
+            #end
+            mresult = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)::Future
+            function handle1(interp, sv)
+                local (; rt, exct, edge, effects, volatile_inf_result) = mresult[]
+                this_conditional = ignorelimited(rt)
+                this_rt = widenwrappedconditional(rt)
+                this_exct = exct
+                # try constant propagation with argtypes for this match
+                # this is in preparation for inlining, or improving the return result
                 this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
                 this_arginfo = ArgInfo(fargs, this_argtypes)
                 const_call_result = abstract_call_method_with_const_args(interp,
-                    result, f, this_arginfo, si, match, sv)
+                    mresult[], f.contents, this_arginfo, si, match, sv)
                 const_result = volatile_inf_result
                 if const_call_result !== nothing
-                    if const_call_result.rt ⊑ₚ rt
-                        rt = const_call_result.rt
+                    this_const_conditional = ignorelimited(const_call_result.rt)
+                    this_const_rt = widenwrappedconditional(const_call_result.rt)
+                    if this_const_rt ⊑ₚ this_rt
+                        # As long as the const-prop result we have is not *worse* than
+                        # what we found out on types, we'd like to use it. Even if the
+                        # end result is exactly equivalent, it is likely that the IR
+                        # we produced while constproping is better than that with
+                        # generic types.
+                        # Return type of const-prop' inference can be wider than that of non const-prop' inference
+                        # e.g. in cases when there are cycles but cached result is still accurate
+                        this_conditional = this_const_conditional
+                        this_rt = this_const_rt
                         (; effects, const_result, edge) = const_call_result
                     elseif is_better_effects(const_call_result.effects, effects)
                         (; effects, const_result, edge) = const_call_result
                     else
                         add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
                     end
-                    if !(exct ⊑ₚ const_call_result.exct)
-                        exct = const_call_result.exct
+                    # Treat the exception type separately. Currently, constprop often cannot determine the exception type
+                    # because consistent-cy does not apply to exceptions.
+                    if const_call_result.exct ⋤ this_exct
+                        this_exct = const_call_result.exct
                         (; const_result, edge) = const_call_result
                     else
                         add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
                     end
                 end
+
                 all_effects = merge_effects(all_effects, effects)
                 if const_result !== nothing
                     if const_results === nothing
-                        const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, #=TODO=#napplicable), nothing)
+                        const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing)
                     end
                     const_results[i] = const_result
                 end
                 edge === nothing || push!(edges, edge)
-                this_rt = tmerge(this_rt, rt)
-                this_exct = tmerge(this_exct, exct)
-                if bail_out_call(interp, this_rt, sv)
-                    break
+                @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
+                if can_propagate_conditional(this_conditional, argtypes)
+                    # The only case where we need to keep this in rt is where
+                    # we can directly propagate the conditional to a slot argument
+                    # that is not one of our arguments, otherwise we keep all the
+                    # relevant information in `conditionals` below.
+                    this_rt = this_conditional
                 end
-            end
-            this_conditional = ignorelimited(this_rt)
-            this_rt = widenwrappedconditional(this_rt)
-        else
-            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, si, sv)
-            (; rt, exct, edge, effects, volatile_inf_result) = result
-            this_conditional = ignorelimited(rt)
-            this_rt = widenwrappedconditional(rt)
-            this_exct = exct
-            # try constant propagation with argtypes for this match
-            # this is in preparation for inlining, or improving the return result
-            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
-            this_arginfo = ArgInfo(fargs, this_argtypes)
-            const_call_result = abstract_call_method_with_const_args(interp,
-                result, f, this_arginfo, si, match, sv)
-            const_result = volatile_inf_result
-            if const_call_result !== nothing
-                this_const_conditional = ignorelimited(const_call_result.rt)
-                this_const_rt = widenwrappedconditional(const_call_result.rt)
-                if this_const_rt ⊑ₚ this_rt
-                    # As long as the const-prop result we have is not *worse* than
-                    # what we found out on types, we'd like to use it. Even if the
-                    # end result is exactly equivalent, it is likely that the IR
-                    # we produced while constproping is better than that with
-                    # generic types.
-                    # Return type of const-prop' inference can be wider than that of non const-prop' inference
-                    # e.g. in cases when there are cycles but cached result is still accurate
-                    this_conditional = this_const_conditional
-                    this_rt = this_const_rt
-                    (; effects, const_result, edge) = const_call_result
-                elseif is_better_effects(const_call_result.effects, effects)
-                    (; effects, const_result, edge) = const_call_result
-                else
-                    add_remark!(interp, sv, "[constprop] Discarded because the result was wider than inference")
+
+                rettype = rettype ⊔ₚ this_rt
+                exctype = exctype ⊔ₚ this_exct
+                if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
+                    if conditionals === nothing
+                        conditionals = Any[Bottom for _ in 1:length(argtypes)],
+                                       Any[Bottom for _ in 1:length(argtypes)]
+                    end
+                    for i = 1:length(argtypes)
+                        cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i)
+                        conditionals[1][i] = conditionals[1][i] ⊔ᵢ cnd.thentype
+                        conditionals[2][i] = conditionals[2][i] ⊔ᵢ cnd.elsetype
+                    end
                 end
-                # Treat the exception type separately. Currently, constprop often cannot determine the exception type
-                # because consistent-cy does not apply to exceptions.
-                if !(this_exct ⊑ₚ const_call_result.exct)
-                    this_exct = const_call_result.exct
-                    (; const_result, edge) = const_call_result
-                else
-                    add_remark!(interp, sv, "[constprop] Discarded exception type because result was wider than inference")
+                if i < napplicable && bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
+                    add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
+                    seenall = false
+                    i = napplicable # break in outer function
                 end
+                i += 1
+                return true
             end
-            all_effects = merge_effects(all_effects, effects)
-            if const_result !== nothing
-                if const_results === nothing
-                    const_results = fill!(Vector{Union{Nothing,ConstResult}}(undef, napplicable), nothing)
-                end
-                const_results[i] = const_result
+            if isready(mresult) && handle1(interp, sv)
+                continue
+            else
+                push!(sv.tasks, handle1)
+                return false
             end
-            edge === nothing || push!(edges, edge)
+        end # while
+
+        if const_results !== nothing
+            @assert napplicable == nmatches(info) == length(const_results)
+            info = ConstCallInfo(info, const_results)
         end
-        @assert !(this_conditional isa Conditional || this_rt isa MustAlias) "invalid lattice element returned from inter-procedural context"
-        seen += 1
-        rettype = tmerge(𝕃ₚ, rettype, this_rt)
-        excttype = tmerge(𝕃ₚ, excttype, this_exct)
-        if has_conditional(𝕃ₚ, sv) && this_conditional !== Bottom && is_lattice_bool(𝕃ₚ, rettype) && fargs !== nothing
-            if conditionals === nothing
-                conditionals = Any[Bottom for _ in 1:length(argtypes)],
-                               Any[Bottom for _ in 1:length(argtypes)]
+
+        if seenall
+            if !fully_covering(matches) || any_ambig(matches)
+                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+                all_effects = Effects(all_effects; nothrow=false)
+                exctype = exctype ⊔ₚ MethodError
             end
-            for i = 1:length(argtypes)
-                cnd = conditional_argtype(𝕃ᵢ, this_conditional, sig, argtypes, i)
-                conditionals[1][i] = tmerge(𝕃ᵢ, conditionals[1][i], cnd.thentype)
-                conditionals[2][i] = tmerge(𝕃ᵢ, conditionals[2][i], cnd.elsetype)
+            if sv isa InferenceState && fargs !== nothing
+                slotrefinements = collect_slot_refinements(𝕃ᵢ, applicable, argtypes, fargs, sv)
             end
-        end
-        if bail_out_call(interp, InferenceLoopState(sig, rettype, all_effects), sv)
-            add_remark!(interp, sv, "Call inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-
-    if const_results !== nothing
-        @assert napplicable == nmatches(info) == length(const_results)
-        info = ConstCallInfo(info, const_results)
-    end
-
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        rettype = excttype = Any
-        all_effects = Effects()
-    elseif isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
-            (!all(matches.fullmatches) || any_ambig(matches))
-        # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-        all_effects = Effects(all_effects; nothrow=false)
-        excttype = tmerge(𝕃ₚ, excttype, MethodError)
-    end
-
-    rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
-
-    # Also considering inferring the compilation signature for this method, so
-    # it is available to the compiler in case it ends up needing it.
-    if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
-        (1 == seen == napplicable) && rettype !== Any && rettype !== Bottom &&
-        !is_removable_if_unused(all_effects))
-        match = applicable[1]::MethodMatch
-        method = match.method
-        sig = match.spec_types
-        mi = specialize_method(match; preexisting=true)
-        if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
-            csig = get_compileable_sig(method, sig, match.sparams)
-            if csig !== nothing && csig !== sig
-                abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)
+        else
+            # there is unanalyzed candidate, widen type and effects to the top
+            rettype = exctype = Any
+            all_effects = Effects()
+        end
+
+        rettype = from_interprocedural!(interp, rettype, sv, arginfo, conditionals)
+
+        # Also considering inferring the compilation signature for this method, so
+        # it is available to the compiler in case it ends up needing it.
+        if (isa(sv, InferenceState) && infer_compilation_signature(interp) &&
+            (seenall && 1 == napplicable) && rettype !== Any && rettype !== Bottom &&
+            !is_removable_if_unused(all_effects))
+            match = applicable[1]::MethodMatch
+            method = match.method
+            sig = match.spec_types
+            mi = specialize_method(match; preexisting=true)
+            if mi !== nothing && !const_prop_methodinstance_heuristic(interp, mi, arginfo, sv)
+                csig = get_compileable_sig(method, sig, match.sparams)
+                if csig !== nothing && csig !== sig
+                    abstract_call_method(interp, method, csig, match.sparams, multiple_matches, StmtInfo(false), sv)::Future
+                end
             end
         end
-    end
 
-    if call_result_unused(si) && !(rettype === Bottom)
-        add_remark!(interp, sv, "Call result type was widened because the return value is unused")
-        # We're mainly only here because the optimizer might want this code,
-        # but we ourselves locally don't typically care about it locally
-        # (beyond checking if it always throws).
-        # So avoid adding an edge, since we don't want to bother attempting
-        # to improve our result even if it does change (to always throw),
-        # and avoid keeping track of a more complex result type.
-        rettype = Any
-    end
-    add_call_backedges!(interp, rettype, all_effects, edges, matches, atype, sv)
-    if isa(sv, InferenceState)
-        # TODO (#48913) implement a proper recursion handling for irinterp:
-        # This works just because currently the `:terminate` condition guarantees that
-        # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-        # We should revisit this once we have a better story for handling cycles in irinterp.
-        if !isempty(sv.pclimitations) # remove self, if present
-            delete!(sv.pclimitations, sv)
-            for caller in callers_in_cycle(sv)
-                delete!(sv.pclimitations, caller)
+        if call_result_unused(si) && !(rettype === Bottom)
+            add_remark!(interp, sv, "Call result type was widened because the return value is unused")
+            # We're mainly only here because the optimizer might want this code,
+            # but we ourselves locally don't typically care about it locally
+            # (beyond checking if it always throws).
+            # So avoid adding an edge, since we don't want to bother attempting
+            # to improve our result even if it does change (to always throw),
+            # and avoid keeping track of a more complex result type.
+            rettype = Any
+        end
+        any_slot_refined = slotrefinements !== nothing
+        add_call_backedges!(interp, rettype, all_effects, any_slot_refined, edges, matches, atype.contents, sv)
+        if isa(sv, InferenceState)
+            # TODO (#48913) implement a proper recursion handling for irinterp:
+            # This works just because currently the `:terminate` condition guarantees that
+            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+            # We should revisit this once we have a better story for handling cycles in irinterp.
+            if !isempty(sv.pclimitations) # remove self, if present
+                delete!(sv.pclimitations, sv)
+                for caller in callers_in_cycle(sv)
+                    delete!(sv.pclimitations, caller)
+                end
             end
         end
-    end
 
-    return CallMeta(rettype, excttype, all_effects, info)
+        gfresult[] = CallMeta(rettype, exctype, all_effects, info, slotrefinements)
+        return true
+    end # infercalls
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return gfresult
 end
 
 struct FailedMethodMatch
@@ -238,21 +261,47 @@ struct MethodMatches
     applicable::Vector{Any}
     info::MethodMatchInfo
     valid_worlds::WorldRange
-    mt::MethodTable
-    fullmatch::Bool
 end
-any_ambig(info::MethodMatchInfo) = info.results.ambig
+any_ambig(result::MethodLookupResult) = result.ambig
+any_ambig(info::MethodMatchInfo) = any_ambig(info.results)
 any_ambig(m::MethodMatches) = any_ambig(m.info)
+fully_covering(info::MethodMatchInfo) = info.fullmatch
+fully_covering(m::MethodMatches) = fully_covering(m.info)
+function add_uncovered_edges!(sv::AbsIntState, info::MethodMatchInfo, @nospecialize(atype))
+    fully_covering(info) || add_mt_backedge!(sv, info.mt, atype)
+    nothing
+end
+add_uncovered_edges!(sv::AbsIntState, matches::MethodMatches, @nospecialize(atype)) =
+    add_uncovered_edges!(sv, matches.info, atype)
 
 struct UnionSplitMethodMatches
     applicable::Vector{Any}
     applicable_argtypes::Vector{Vector{Any}}
     info::UnionSplitInfo
     valid_worlds::WorldRange
-    mts::Vector{MethodTable}
-    fullmatches::Vector{Bool}
 end
-any_ambig(m::UnionSplitMethodMatches) = any(any_ambig, m.info.matches)
+any_ambig(info::UnionSplitInfo) = any(any_ambig, info.split)
+any_ambig(m::UnionSplitMethodMatches) = any_ambig(m.info)
+fully_covering(info::UnionSplitInfo) = all(fully_covering, info.split)
+fully_covering(m::UnionSplitMethodMatches) = fully_covering(m.info)
+function add_uncovered_edges!(sv::AbsIntState, info::UnionSplitInfo, @nospecialize(atype))
+    all(fully_covering, info.split) && return nothing
+    # add mt backedges with removing duplications
+    for mt in uncovered_method_tables(info)
+        add_mt_backedge!(sv, mt, atype)
+    end
+end
+add_uncovered_edges!(sv::AbsIntState, matches::UnionSplitMethodMatches, @nospecialize(atype)) =
+    add_uncovered_edges!(sv, matches.info, atype)
+function uncovered_method_tables(info::UnionSplitInfo)
+    mts = MethodTable[]
+    for mminfo in info.split
+        fully_covering(mminfo) && continue
+        any(mt′::MethodTable->mt′===mminfo.mt, mts) && continue
+        push!(mts, mminfo.mt)
+    end
+    return mts
+end
 
 function find_method_matches(interp::AbstractInterpreter, argtypes::Vector{Any}, @nospecialize(atype);
                              max_union_splitting::Int = InferenceParams(interp).max_union_splitting,
@@ -274,41 +323,28 @@ function find_union_split_method_matches(interp::AbstractInterpreter, argtypes::
     applicable = Any[]
     applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
     valid_worlds = WorldRange()
-    mts = MethodTable[]
-    fullmatches = Bool[]
     for i in 1:length(split_argtypes)
         arg_n = split_argtypes[i]::Vector{Any}
         sig_n = argtypes_to_type(arg_n)
         mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
         mt === nothing && return FailedMethodMatch("Could not identify method table for call")
         mt = mt::MethodTable
-        matches = findall(sig_n, method_table(interp); limit = max_methods)
-        if matches === nothing
+        thismatches = findall(sig_n, method_table(interp); limit = max_methods)
+        if thismatches === nothing
             return FailedMethodMatch("For one of the union split cases, too many methods matched")
         end
-        push!(infos, MethodMatchInfo(matches))
-        for m in matches
+        for m in thismatches
             push!(applicable, m)
             push!(applicable_argtypes, arg_n)
         end
-        valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-        thisfullmatch = any(match::MethodMatch->match.fully_covers, matches)
-        found = false
-        for (i, mt′) in enumerate(mts)
-            if mt′ === mt
-                fullmatches[i] &= thisfullmatch
-                found = true
-                break
-            end
-        end
-        if !found
-            push!(mts, mt)
-            push!(fullmatches, thisfullmatch)
-        end
+        valid_worlds = intersect(valid_worlds, thismatches.valid_worlds)
+        thisfullmatch = any(match::MethodMatch->match.fully_covers, thismatches)
+        thisinfo = MethodMatchInfo(thismatches, mt, thisfullmatch)
+        push!(infos, thisinfo)
     end
     info = UnionSplitInfo(infos)
     return UnionSplitMethodMatches(
-        applicable, applicable_argtypes, info, valid_worlds, mts, fullmatches)
+        applicable, applicable_argtypes, info, valid_worlds)
 end
 
 function find_simple_method_matches(interp::AbstractInterpreter, @nospecialize(atype), max_methods::Int)
@@ -323,10 +359,9 @@ function find_simple_method_matches(interp::AbstractInterpreter, @nospecialize(a
         # (assume this will always be true, so we don't compute / update valid age in this case)
         return FailedMethodMatch("Too many methods matched")
     end
-    info = MethodMatchInfo(matches)
     fullmatch = any(match::MethodMatch->match.fully_covers, matches)
-    return MethodMatches(
-        matches.matches, info, matches.valid_worlds, mt, fullmatch)
+    info = MethodMatchInfo(matches, mt, fullmatch)
+    return MethodMatches(matches.matches, info, matches.valid_worlds)
 end
 
 """
@@ -354,7 +389,7 @@ function from_interprocedural!(interp::AbstractInterpreter, @nospecialize(rt), s
                                arginfo::ArgInfo, @nospecialize(maybecondinfo))
     rt = collect_limitations!(rt, sv)
     if isa(rt, InterMustAlias)
-        rt = from_intermustalias(rt, arginfo, sv)
+        rt = from_intermustalias(typeinf_lattice(interp), rt, arginfo, sv)
     elseif is_lattice_bool(ipo_lattice(interp), rt)
         if maybecondinfo === nothing
             rt = widenconditional(rt)
@@ -374,12 +409,13 @@ function collect_limitations!(@nospecialize(typ), sv::InferenceState)
     return typ
 end
 
-function from_intermustalias(rt::InterMustAlias, arginfo::ArgInfo, sv::AbsIntState)
+function from_intermustalias(𝕃ᵢ::AbstractLattice, rt::InterMustAlias, arginfo::ArgInfo, sv::AbsIntState)
     fargs = arginfo.fargs
     if fargs !== nothing && 1 ≤ rt.slot ≤ length(fargs)
         arg = ssa_def_slot(fargs[rt.slot], sv)
         if isa(arg, SlotNumber)
             argtyp = widenslotwrapper(arginfo.argtypes[rt.slot])
+            ⊑ = partialorder(𝕃ᵢ)
             if rt.vartyp ⊑ argtyp
                 return MustAlias(arg, rt.vartyp, rt.fldidx, rt.fldtyp)
             else
@@ -395,10 +431,14 @@ function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::
     has_conditional(𝕃ᵢ, sv) || return widenconditional(rt)
     (; fargs, argtypes) = arginfo
     fargs === nothing && return widenconditional(rt)
+    if can_propagate_conditional(rt, argtypes)
+        return propagate_conditional(rt, argtypes[rt.slot]::Conditional)
+    end
     slot = 0
     alias = nothing
     thentype = elsetype = Any
     condval = maybe_extract_const_bool(rt)
+    ⊑, ⋤, ⊓ = partialorder(𝕃ᵢ), strictneqpartialorder(𝕃ᵢ), meet(𝕃ᵢ)
     for i in 1:length(fargs)
         # find the first argument which supports refinement,
         # and intersect all equivalent arguments with it
@@ -434,24 +474,24 @@ function from_interconditional(𝕃ᵢ::AbstractLattice, @nospecialize(rt), sv::
             end
             if condval === false
                 thentype = Bottom
-            elseif ⊑(𝕃ᵢ, new_thentype, thentype)
+            elseif new_thentype ⊑ thentype
                 thentype = new_thentype
             else
-                thentype = tmeet(𝕃ᵢ, thentype, widenconst(new_thentype))
+                thentype = thentype ⊓ widenconst(new_thentype)
             end
             if condval === true
                 elsetype = Bottom
-            elseif ⊑(𝕃ᵢ, new_elsetype, elsetype)
+            elseif new_elsetype ⊑ elsetype
                 elsetype = new_elsetype
             else
-                elsetype = tmeet(𝕃ᵢ, elsetype, widenconst(new_elsetype))
+                elsetype = elsetype ⊓ widenconst(new_elsetype)
             end
-            if (slot > 0 || condval !== false) && ⋤(𝕃ᵢ, thentype, old)
+            if (slot > 0 || condval !== false) && thentype ⋤ old
                 slot = id
                 if !(arg isa SlotNumber) && argtyp isa MustAlias
                     alias = argtyp
                 end
-            elseif (slot > 0 || condval !== true) && ⋤(𝕃ᵢ, elsetype, old)
+            elseif (slot > 0 || condval !== true) && elsetype ⋤ old
                 slot = id
                 if !(arg isa SlotNumber) && argtyp isa MustAlias
                     alias = argtyp
@@ -492,8 +532,37 @@ function conditional_argtype(𝕃ᵢ::AbstractLattice, @nospecialize(rt), @nospe
     end
 end
 
-function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype), all_effects::Effects,
-    edges::Vector{MethodInstance}, matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
+function collect_slot_refinements(𝕃ᵢ::AbstractLattice, applicable::Vector{Any},
+    argtypes::Vector{Any}, fargs::Vector{Any}, sv::InferenceState)
+    ⊏, ⊔ = strictpartialorder(𝕃ᵢ), join(𝕃ᵢ)
+    slotrefinements = nothing
+    for i = 1:length(fargs)
+        fargᵢ = fargs[i]
+        if fargᵢ isa SlotNumber
+            fidx = slot_id(fargᵢ)
+            argt = widenslotwrapper(argtypes[i])
+            if isvarargtype(argt)
+                argt = unwrapva(argt)
+            end
+            sigt = Bottom
+            for j = 1:length(applicable)
+                match = applicable[j]::MethodMatch
+                sigt = sigt ⊔ fieldtype(match.spec_types, i)
+            end
+            if sigt ⊏ argt # i.e. signature type is strictly more specific than the type of the argument slot
+                if slotrefinements === nothing
+                    slotrefinements = fill!(Vector{Any}(undef, length(sv.slottypes)), nothing)
+                end
+                slotrefinements[fidx] = sigt
+            end
+        end
+    end
+    return slotrefinements
+end
+
+function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype),
+    all_effects::Effects, any_slot_refined::Bool, edges::Vector{MethodInstance},
+    matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
     sv::AbsIntState)
     # don't bother to add backedges when both type and effects information are already
     # maximized to the top since a new method couldn't refine or widen them anyway
@@ -503,20 +572,16 @@ function add_call_backedges!(interp::AbstractInterpreter, @nospecialize(rettype)
         if !isoverlayed(method_table(interp))
             all_effects = Effects(all_effects; nonoverlayed=ALWAYS_FALSE)
         end
-        all_effects === Effects() && return nothing
+        if all_effects === Effects() && !any_slot_refined
+            return nothing
+        end
     end
     for edge in edges
         add_backedge!(sv, edge)
     end
     # also need an edge to the method table in case something gets
     # added that did not intersect with any existing method
-    if isa(matches, MethodMatches)
-        matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
-    else
-        for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
-            thisfullmatch || add_mt_backedge!(sv, mt, atype)
-        end
-    end
+    add_uncovered_edges!(sv, matches, atype)
     return nothing
 end
 
@@ -529,9 +594,9 @@ function abstract_call_method(interp::AbstractInterpreter,
                               hardlimit::Bool, si::StmtInfo, sv::AbsIntState)
     sigtuple = unwrap_unionall(sig)
     sigtuple isa DataType ||
-        return MethodCallResult(Any, Any, false, false, nothing, Effects())
+        return Future(MethodCallResult(Any, Any, false, false, nothing, Effects()))
     all(@nospecialize(x) -> valid_as_lattice(unwrapva(x), true), sigtuple.parameters) ||
-        return MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS) # catch bad type intersections early
+        return Future(MethodCallResult(Union{}, Any, false, false, nothing, EFFECTS_THROWS)) # catch bad type intersections early
 
     if is_nospecializeinfer(method)
         sig = get_nospecializeinfer_sig(method, sig, sparams)
@@ -556,7 +621,7 @@ function abstract_call_method(interp::AbstractInterpreter,
                     # we have a self-cycle in the call-graph, but not in the inference graph (typically):
                     # break this edge now (before we record it) by returning early
                     # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                    return MethodCallResult(Any, Any, true, true, nothing, Effects())
+                    return Future(MethodCallResult(Any, Any, true, true, nothing, Effects()))
                 end
                 topmost = nothing
                 edgecycle = true
@@ -611,17 +676,27 @@ function abstract_call_method(interp::AbstractInterpreter,
                 # since it's very unlikely that we'll try to inline this,
                 # or want make an invoke edge to its calling convention return type.
                 # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                return MethodCallResult(Any, Any, true, true, nothing, Effects())
+                return Future(MethodCallResult(Any, Any, true, true, nothing, Effects()))
             end
             add_remark!(interp, sv, washardlimit ? RECURSION_MSG_HARDLIMIT : RECURSION_MSG)
             # TODO (#48913) implement a proper recursion handling for irinterp:
-            # This works just because currently the `:terminate` condition guarantees that
-            # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
+            # This works just because currently the `:terminate` condition usually means this is unreachable here
+            # for irinterp because there are not unresolved cycles, but it's not a good solution.
             # We should revisit this once we have a better story for handling cycles in irinterp.
-            if isa(topmost, InferenceState)
+            if isa(sv, InferenceState)
+                # since the hardlimit is against the edge to the parent frame,
+                # we should try to poison the whole edge, not just the topmost frame
                 parentframe = frame_parent(topmost)
-                if isa(sv, InferenceState) && isa(parentframe, InferenceState)
-                    poison_callstack!(sv, parentframe === nothing ? topmost : parentframe)
+                while !isa(parentframe, InferenceState)
+                    # attempt to find a parent frame that can handle this LimitedAccuracy result correctly
+                    # so we don't try to cache this incomplete intermediate result
+                    parentframe === nothing && break
+                    parentframe = frame_parent(parentframe)
+                end
+                if isa(parentframe, InferenceState)
+                    poison_callstack!(sv, parentframe)
+                elseif isa(topmost, InferenceState)
+                    poison_callstack!(sv, topmost)
                 end
             end
             # n.b. this heuristic depends on the non-local state, so we must record the limit later
@@ -657,31 +732,7 @@ function abstract_call_method(interp::AbstractInterpreter,
         sparams = recomputed[2]::SimpleVector
     end
 
-    (; rt, exct, edge, effects, volatile_inf_result) = typeinf_edge(interp, method, sig, sparams, sv)
-
-    if edge === nothing
-        edgecycle = edgelimited = true
-    end
-
-    # we look for the termination effect override here as well, since the :terminates effect
-    # may have been tainted due to recursion at this point even if it's overridden
-    if is_effect_overridden(sv, :terminates_globally)
-        # this frame is known to terminate
-        effects = Effects(effects, terminates=true)
-    elseif is_effect_overridden(method, :terminates_globally)
-        # this edge is known to terminate
-        effects = Effects(effects; terminates=true)
-    elseif edgecycle
-        # Some sort of recursion was detected.
-        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
-            # no `MethodInstance` cycles -- don't taint :terminate
-        else
-            # we cannot guarantee that the call will terminate
-            effects = Effects(effects; terminates=false)
-        end
-    end
-
-    return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result)
+    return typeinf_edge(interp, method, sig, sparams, sv, edgecycle, edgelimited)
 end
 
 function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
@@ -709,10 +760,10 @@ function edge_matches_sv(interp::AbstractInterpreter, frame::AbsIntState,
         # otherwise, we don't
 
         # check in the cycle list first
-        # all items in here are mutual parents of all others
+        # all items in here are considered mutual parents of all others
         if !any(p::AbsIntState->matches_sv(p, sv), callers_in_cycle(frame))
             let parent = frame_parent(frame)
-                parent !== nothing || return false
+                parent === nothing && return false
                 (is_cached(parent) || frame_parent(parent) !== nothing) || return false
                 matches_sv(parent, sv) || return false
             end
@@ -896,7 +947,7 @@ function concrete_eval_eligible(interp::AbstractInterpreter,
         end
     end
     mi = result.edge
-    if mi !== nothing && is_foldable(effects)
+    if mi !== nothing && is_foldable(effects, #=check_rtcall=#true)
         if f !== nothing && is_all_const_arg(arginfo, #=start=#2)
             if (is_nonoverlayed(interp) || is_nonoverlayed(effects) ||
                 # Even if overlay methods are involved, when `:consistent_overlay` is
@@ -943,7 +994,7 @@ collect_const_args(arginfo::ArgInfo, start::Int) = collect_const_args(arginfo.ar
 function collect_const_args(argtypes::Vector{Any}, start::Int)
     return Any[ let a = widenslotwrapper(argtypes[i])
                     isa(a, Const) ? a.val :
-                    isconstType(a) ? (a::DataType).parameters[1] :
+                    isconstType(a) ? a.parameters[1] :
                     (a::DataType).instance
                 end for i = start:length(argtypes) ]
 end
@@ -990,12 +1041,12 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
     match::MethodMatch, sv::AbsIntState)
     method = match.method
     force = force_const_prop(interp, f, method)
-    if !const_prop_entry_heuristic(interp, result, si, sv, force)
-        # N.B. remarks are emitted within `const_prop_entry_heuristic`
+    if !const_prop_rettype_heuristic(interp, result, si, sv, force)
+        # N.B. remarks are emitted within `const_prop_rettype_heuristic`
         return nothing
     end
     if !const_prop_argument_heuristic(interp, arginfo, sv)
-        add_remark!(interp, sv, "[constprop] Disabled by argument and rettype heuristics")
+        add_remark!(interp, sv, "[constprop] Disabled by argument heuristics")
         return nothing
     end
     all_overridden = is_all_overridden(interp, arginfo, sv)
@@ -1017,28 +1068,28 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter,
     return mi
 end
 
-function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult,
-                                    si::StmtInfo, sv::AbsIntState, force::Bool)
-    if result.rt isa LimitedAccuracy
+function const_prop_rettype_heuristic(interp::AbstractInterpreter, result::MethodCallResult,
+                                      si::StmtInfo, sv::AbsIntState, force::Bool)
+    rt = result.rt
+    if rt isa LimitedAccuracy
         # optimizations like inlining are disabled for limited frames,
         # thus there won't be much benefit in constant-prop' here
         # N.B. don't allow forced constprop' for safety (xref #52763)
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (limited accuracy)")
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (limited accuracy)")
         return false
     elseif force
         return true
     elseif call_result_unused(si) && result.edgecycle
-        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (edgecycle with unused result)")
         return false
     end
     # check if this return type is improvable (i.e. whether it's possible that with more
     # information, we might get a more precise type)
-    rt = result.rt
     if isa(rt, Type)
         # could always be improved to `Const`, `PartialStruct` or just a more precise type,
         # unless we're already at `Bottom`
         if rt === Bottom
-            add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (erroneous result)")
+            add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (erroneous result)")
             return false
         end
         return true
@@ -1047,14 +1098,15 @@ function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodC
         return true
     elseif isa(rt, Const)
         if is_nothrow(result.effects)
-            add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (nothrow const)")
+            add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (nothrow const)")
             return false
         end
         # Could still be improved to Bottom (or at least could see the effects improved)
         return true
+    else
+        add_remark!(interp, sv, "[constprop] Disabled by rettype heuristic (unimprovable result)")
+        return false
     end
-    add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (unimprovable result)")
-    return false
 end
 
 # determines heuristically whether if constant propagation can be worthwhile
@@ -1113,16 +1165,16 @@ end
 function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
     return is_aggressive_constprop(method) ||
            InferenceParams(interp).aggressive_constant_propagation ||
-           istopfunction(f, :getproperty) ||
-           istopfunction(f, :setproperty!)
+           typename(typeof(f)).constprop_heuristic === Core.FORCE_CONST_PROP
 end
 
 function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecialize(f),
     arginfo::ArgInfo, all_overridden::Bool, sv::AbsIntState)
     argtypes = arginfo.argtypes
+    heuristic = typename(typeof(f)).constprop_heuristic
     if length(argtypes) > 1
         𝕃ᵢ = typeinf_lattice(interp)
-        if istopfunction(f, :getindex) || istopfunction(f, :setindex!)
+        if heuristic === Core.ARRAY_INDEX_HEURISTIC
             arrty = argtypes[2]
             # don't propagate constant index into indexing of non-constant array
             if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
@@ -1135,17 +1187,14 @@ function const_prop_function_heuristic(interp::AbstractInterpreter, @nospecializ
             elseif ⊑(𝕃ᵢ, arrty, Array) || ⊑(𝕃ᵢ, arrty, GenericMemory)
                 return false
             end
-        elseif istopfunction(f, :iterate)
+        elseif heuristic === Core.ITERATE_HEURISTIC
             itrty = argtypes[2]
             if ⊑(𝕃ᵢ, itrty, Array) || ⊑(𝕃ᵢ, itrty, GenericMemory)
                 return false
             end
         end
     end
-    if !all_overridden && (istopfunction(f, :+) || istopfunction(f, :-) || istopfunction(f, :*) ||
-                           istopfunction(f, :(==)) || istopfunction(f, :!=) ||
-                           istopfunction(f, :<=) || istopfunction(f, :>=) || istopfunction(f, :<) || istopfunction(f, :>) ||
-                           istopfunction(f, :<<) || istopfunction(f, :>>))
+    if !all_overridden && heuristic === Core.SAMETYPE_HEURISTIC
         # it is almost useless to inline the op when all the same type,
         # but highly worthwhile to inline promote of a constant
         length(argtypes) > 2 || return false
@@ -1216,7 +1265,7 @@ function semi_concrete_eval_call(interp::AbstractInterpreter,
     if code !== nothing
         irsv = IRInterpretationState(interp, code, mi, arginfo.argtypes, world)
         if irsv !== nothing
-            irsv.parent = sv
+            assign_parentchild!(irsv, sv)
             rt, (nothrow, noub) = ir_abstract_constant_propagation(interp, irsv)
             @assert !(rt isa Conditional || rt isa MustAlias) "invalid lattice element returned from irinterp"
             if !(isa(rt, Type) && hasintersect(rt, Bool))
@@ -1233,7 +1282,7 @@ function semi_concrete_eval_call(interp::AbstractInterpreter,
                     effects = Effects(effects; noub=ALWAYS_TRUE)
                 end
                 exct = refine_exception_type(result.exct, effects)
-                return ConstCallResults(rt, exct, SemiConcreteResult(mi, ir, effects), effects, mi)
+                return ConstCallResults(rt, exct, SemiConcreteResult(mi, ir, effects, spec_info(irsv)), effects, mi)
             end
         end
     end
@@ -1245,7 +1294,7 @@ const_prop_result(inf_result::InferenceResult) =
                      inf_result.ipo_effects, inf_result.linfo)
 
 # return cached result of constant analysis
-return_cached_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
+return_localcache_result(::AbstractInterpreter, inf_result::InferenceResult, ::AbsIntState) =
     const_prop_result(inf_result)
 
 function compute_forwarded_argtypes(interp::AbstractInterpreter, arginfo::ArgInfo, sv::AbsIntState)
@@ -1275,7 +1324,7 @@ function const_prop_call(interp::AbstractInterpreter,
             return nothing
         end
         @assert inf_result.linfo === mi "MethodInstance for cached inference result does not match"
-        return return_cached_result(interp, inf_result, sv)
+        return return_localcache_result(interp, inf_result, sv)
     end
     overridden_by_const = falses(length(argtypes))
     for i = 1:length(argtypes)
@@ -1289,16 +1338,22 @@ function const_prop_call(interp::AbstractInterpreter,
     end
     # perform fresh constant prop'
     inf_result = InferenceResult(mi, argtypes, overridden_by_const)
-    frame = InferenceState(inf_result, #=cache_mode=#:local, interp)
+    frame = InferenceState(inf_result, #=cache_mode=#:local, interp) # TODO: this should also be converted to a stackless Future
     if frame === nothing
         add_remark!(interp, sv, "[constprop] Could not retrieve the source")
         return nothing # this is probably a bad generated function (unsound), but just ignore it
     end
-    frame.parent = sv
+    assign_parentchild!(frame, sv)
     if !typeinf(interp, frame)
         add_remark!(interp, sv, "[constprop] Fresh constant inference hit a cycle")
+        @assert frame.frameid != 0 && frame.cycleid == frame.frameid
+        callstack = frame.callstack::Vector{AbsIntState}
+        @assert callstack[end] === frame && length(callstack) == frame.frameid
+        pop!(callstack)
         return nothing
     end
+    @assert frame.frameid != 0 && frame.cycleid == frame.frameid
+    @assert frame.parentid == sv.frameid
     @assert inf_result.result !== nothing
     # ConditionalSimpleArgtypes is allowed, because the only case in which it modifies
     # the argtypes is when one of the argtypes is a `Conditional`, which case
@@ -1326,7 +1381,6 @@ function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance,
     given_argtypes = Vector{Any}(undef, length(argtypes))
     def = mi.def::Method
     nargs = Int(def.nargs)
-    local condargs = nothing
     for i in 1:length(argtypes)
         argtype = argtypes[i]
         # forward `Conditional` if it conveys a constraint on any other argument
@@ -1343,10 +1397,6 @@ function matching_cache_argtypes(𝕃::AbstractLattice, mi::MethodInstance,
                     # TODO bail out here immediately rather than just propagating Bottom ?
                     given_argtypes[i] = Bottom
                 else
-                    if condargs === nothing
-                        condargs = Tuple{Int,Int}[]
-                    end
-                    push!(condargs, (slotid, i))
                     given_argtypes[i] = Conditional(slotid, thentype, elsetype)
                 end
                 continue
@@ -1430,9 +1480,9 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
         widet = typ.typ
         if isa(widet, DataType)
             if widet.name === Tuple.name
-                return AbstractIterationResult(typ.fields, nothing)
+                return Future(AbstractIterationResult(typ.fields, nothing))
             elseif widet.name === _NAMEDTUPLE_NAME
-                return AbstractIterationResult(typ.fields, nothing)
+                return Future(AbstractIterationResult(typ.fields, nothing))
             end
         end
     end
@@ -1440,7 +1490,7 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
     if isa(typ, Const)
         val = typ.val
         if isa(val, SimpleVector) || isa(val, Tuple) || isa(val, NamedTuple)
-            return AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing) # avoid making a tuple Generator here!
+            return Future(AbstractIterationResult(Any[ Const(val[i]) for i in 1:length(val) ], nothing)) # avoid making a tuple Generator here!
         end
     end
 
@@ -1457,18 +1507,18 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
         # refine the Union to remove elements that are not valid tags for objects
         filter!(@nospecialize(x) -> valid_as_lattice(x, true), utis)
         if length(utis) == 0
-            return AbstractIterationResult(Any[], nothing) # oops, this statement was actually unreachable
+            return Future(AbstractIterationResult(Any[], nothing)) # oops, this statement was actually unreachable
         elseif length(utis) == 1
             tti = utis[1]
             tti0 = rewrap_unionall(tti, tti0)
         else
             if any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
-                return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+                return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
             end
             ltp = length((utis[1]::DataType).parameters)
             for t in utis
                 if length((t::DataType).parameters) != ltp
-                    return AbstractIterationResult(Any[Vararg{Any}], nothing)
+                    return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
                 end
             end
             result = Any[ Union{} for _ in 1:ltp ]
@@ -1479,14 +1529,14 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
                     result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
                 end
             end
-            return AbstractIterationResult(result, nothing)
+            return Future(AbstractIterationResult(result, nothing))
         end
     end
     if tti0 <: Tuple
         if isa(tti0, DataType)
-            return AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing)
+            return Future(AbstractIterationResult(Any[ p for p in tti0.parameters ], nothing))
         elseif !isa(tti, DataType)
-            return AbstractIterationResult(Any[Vararg{Any}], nothing)
+            return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
         else
             len = length(tti.parameters)
             last = tti.parameters[len]
@@ -1499,17 +1549,17 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
                     elts[len] = Vararg{elts[len]}
                 end
             end
-            return AbstractIterationResult(elts, nothing)
+            return Future(AbstractIterationResult(elts, nothing))
         end
     elseif tti0 === SimpleVector
-        return AbstractIterationResult(Any[Vararg{Any}], nothing)
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing))
     elseif tti0 === Any
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
     elseif tti0 <: Array || tti0 <: GenericMemory
         if eltype(tti0) === Union{}
-            return AbstractIterationResult(Any[], nothing)
+            return Future(AbstractIterationResult(Any[], nothing))
         end
-        return AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing)
+        return Future(AbstractIterationResult(Any[Vararg{eltype(tti0)}], nothing))
     else
         return abstract_iteration(interp, itft, typ, sv)
     end
@@ -1520,95 +1570,144 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
     if isa(itft, Const)
         iteratef = itft.val
     else
-        return AbstractIterationResult(Any[Vararg{Any}], nothing, Effects())
+        return Future(AbstractIterationResult(Any[Vararg{Any}], nothing, Effects()))
     end
     @assert !isvarargtype(itertype)
-    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)
-    stateordonet = call.rt
-    info = call.info
-    # Return Bottom if this is not an iterator.
-    # WARNING: Changes to the iteration protocol must be reflected here,
-    # this is not just an optimization.
-    # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
-    stateordonet === Bottom && return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call.effects, info)], true))
-    valtype = statetype = Bottom
-    ret = Any[]
-    calls = CallMeta[call]
-    stateordonet_widened = widenconst(stateordonet)
-    𝕃ᵢ = typeinf_lattice(interp)
 
-    # Try to unroll the iteration up to max_tuple_splat, which covers any finite
-    # length iterators, or interesting prefix
-    while true
-        if stateordonet_widened === Nothing
-            return AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
-        end
-        if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
-            break
-        end
-        if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
-            break
-        end
-        nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
-        # If there's no new information in this statetype, don't bother continuing,
-        # the iterator won't be finite.
-        if ⊑(𝕃ᵢ, nstatetype, statetype)
-            return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
-        end
-        valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
-        push!(ret, valtype)
-        statetype = nstatetype
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        stateordonet = call.rt
+    iterateresult = Future{AbstractIterationResult}()
+    call1future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), StmtInfo(true), sv)::Future
+    function inferiterate(interp, sv)
+        call1 = call1future[]
+        stateordonet = call1.rt
+        # Return Bottom if this is not an iterator.
+        # WARNING: Changes to the iteration protocol must be reflected here,
+        # this is not just an optimization.
+        # TODO: this doesn't realize that Array, GenericMemory, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
+        if stateordonet === Bottom
+            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, Any, call1.effects, call1.info)], true))
+            return true
+        end
         stateordonet_widened = widenconst(stateordonet)
-        push!(calls, call)
-    end
-    # From here on, we start asking for results on the widened types, rather than
-    # the precise (potentially const) state type
-    # statetype and valtype are reinitialized in the first iteration below from the
-    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
-    valtype = statetype = Bottom
-    may_have_terminated = Nothing <: stateordonet_widened
-    while valtype !== Any
-        nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
-        if nounion !== Union{} && !isa(nounion, DataType)
-            # nounion is of a type we cannot handle
-            valtype = Any
-            break
-        end
-        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
-            # reached a fixpoint or iterator failed/gave invalid answer
-            if !hasintersect(stateordonet_widened, Nothing)
-                # ... but cannot terminate
-                if !may_have_terminated
-                    #  ... and cannot have terminated prior to this loop
-                    return AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
-                else
-                    # iterator may have terminated prior to this loop, but not during it
-                    valtype = Bottom
+        calls = CallMeta[call1]
+        valtype = statetype = Bottom
+        ret = Any[]
+        𝕃ᵢ = typeinf_lattice(interp)
+        may_have_terminated = false
+        local call2future::Future{CallMeta}
+
+        nextstate::UInt8 = 0x0
+        function inferiterate_2arg(interp, sv)
+            if nextstate === 0x1
+                nextstate = 0xff
+                @goto state1
+            elseif nextstate === 0x2
+                nextstate = 0xff
+                @goto state2
+            else
+                @assert nextstate === 0x0
+                nextstate = 0xff
+            end
+
+            # Try to unroll the iteration up to max_tuple_splat, which covers any finite
+            # length iterators, or interesting prefix
+            while true
+                if stateordonet_widened === Nothing
+                    iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, true))
+                    return true
+                end
+                if Nothing <: stateordonet_widened || length(ret) >= InferenceParams(interp).max_tuple_splat
+                    break
+                end
+                if !isa(stateordonet_widened, DataType) || !(stateordonet_widened <: Tuple) || isvatuple(stateordonet_widened) || length(stateordonet_widened.parameters) != 2
+                    break
+                end
+                nstatetype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(2))
+                # If there's no new information in this statetype, don't bother continuing,
+                # the iterator won't be finite.
+                if ⊑(𝕃ᵢ, nstatetype, statetype)
+                    iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), EFFECTS_THROWS)
+                    return true
+                end
+                valtype = getfield_tfunc(𝕃ᵢ, stateordonet, Const(1))
+                push!(ret, valtype)
+                statetype = nstatetype
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x1
+                    return false
+                    @label state1
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
                 end
             end
-            break
-        end
-        valtype = tmerge(valtype, nounion.parameters[1])
-        statetype = tmerge(statetype, nounion.parameters[2])
-        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)
-        push!(calls, call)
-        stateordonet = call.rt
-        stateordonet_widened = widenconst(stateordonet)
-    end
-    if valtype !== Union{}
-        push!(ret, Vararg{valtype})
+            # From here on, we start asking for results on the widened types, rather than
+            # the precise (potentially const) state type
+            # statetype and valtype are reinitialized in the first iteration below from the
+            # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+            valtype = statetype = Bottom
+            may_have_terminated = Nothing <: stateordonet_widened
+            while valtype !== Any
+                nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
+                if nounion !== Union{} && !isa(nounion, DataType)
+                    # nounion is of a type we cannot handle
+                    valtype = Any
+                    break
+                end
+                if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+                    # reached a fixpoint or iterator failed/gave invalid answer
+                    if !hasintersect(stateordonet_widened, Nothing)
+                        # ... but cannot terminate
+                        if may_have_terminated
+                            # ... and iterator may have terminated prior to this loop, but not during it
+                            valtype = Bottom
+                        else
+                            #  ... or cannot have terminated prior to this loop
+                            iterateresult[] = AbstractIterationResult(Any[Bottom], AbstractIterationInfo(calls, false), Effects())
+                            return true
+                        end
+                    end
+                    break
+                end
+                valtype = tmerge(valtype, nounion.parameters[1])
+                statetype = tmerge(statetype, nounion.parameters[2])
+                call2future = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), StmtInfo(true), sv)::Future
+                if !isready(call2future)
+                    nextstate = 0x2
+                    return false
+                    @label state2
+                end
+                let call = call2future[]
+                    push!(calls, call)
+                    stateordonet = call.rt
+                    stateordonet_widened = widenconst(stateordonet)
+                end
+            end
+            if valtype !== Union{}
+                push!(ret, Vararg{valtype})
+            end
+            iterateresult[] = AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
+            return true
+        end # inferiterate_2arg
+        # continue making progress as much as possible, on iterate(arg, state)
+        inferiterate_2arg(interp, sv) || push!(sv.tasks, inferiterate_2arg)
+        return true
+    end # inferiterate
+    # continue making progress as soon as possible, on iterate(arg)
+    if !(isready(call1future) && inferiterate(interp, sv))
+        push!(sv.tasks, inferiterate)
     end
-    return AbstractIterationResult(ret, AbstractIterationInfo(calls, false))
+    return iterateresult
 end
 
 # do apply(af, fargs...), where af is a function value
 function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo,
                         sv::AbsIntState, max_methods::Int=get_max_methods(interp, sv))
-    itft = argtype_by_index(argtypes, 2)
+    itft = Core.Box(argtype_by_index(argtypes, 2))
     aft = argtype_by_index(argtypes, 3)
-    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    (itft.contents === Bottom || aft === Bottom) && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     aargtypes = argtype_tail(argtypes, 4)
     aftw = widenconst(aft)
     if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
@@ -1616,100 +1715,155 @@ function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, si::
             add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
             # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
             # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
-            return CallMeta(Any, Any, Effects(), NoCallInfo())
+            return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
         end
     end
     res = Union{}
-    nargs = length(aargtypes)
     splitunions = 1 < unionsplitcost(typeinf_lattice(interp), aargtypes) <= InferenceParams(interp).max_apply_union_enum
-    ctypes = [Any[aft]]
-    infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
-    effects = EFFECTS_TOTAL
-    for i = 1:nargs
-        ctypes´ = Vector{Any}[]
-        infos′ = Vector{MaybeAbstractIterationInfo}[]
-        for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
-            if !isvarargtype(ti)
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, ti, sv)
-            else
-                (;cti, info, ai_effects) = precise_container_type(interp, itft, unwrapva(ti), sv)
-                # We can't represent a repeating sequence of the same types,
-                # so tmerge everything together to get one type that represents
-                # everything.
-                argt = cti[end]
-                if isvarargtype(argt)
-                    argt = unwrapva(argt)
+    ctypes::Vector{Vector{Any}} = [Any[aft]]
+    infos::Vector{Vector{MaybeAbstractIterationInfo}} = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
+    all_effects::Effects = EFFECTS_TOTAL
+    retinfos = ApplyCallInfo[]
+    retinfo = UnionSplitApplyCallInfo(retinfos)
+    exctype = Union{}
+    ctypes´ = Vector{Any}[]
+    infos´ = Vector{MaybeAbstractIterationInfo}[]
+    local ti, argtypesi
+    local ctfuture::Future{AbstractIterationResult}
+    local callfuture::Future{CallMeta}
+
+    applyresult = Future{CallMeta}()
+    # split the rest into a resumable state machine
+    i::Int = 1
+    j::Int = 1
+    nextstate::UInt8 = 0x0
+    function infercalls(interp, sv)
+        # n.b. Remember that variables will lose their values across restarts,
+        # so be sure to manually hoist any values that must be preserved and do
+        # not rely on program order.
+        # This is a little more complex than the closure continuations often used elsewhere, but avoids needing to manage all of that indentation
+        if nextstate === 0x1
+            nextstate = 0xff
+            @goto state1
+        elseif nextstate === 0x2
+            nextstate = 0xff
+            @goto state2
+        elseif nextstate === 0x3
+            nextstate = 0xff
+            @goto state3
+        else
+            @assert nextstate === 0x0
+            nextstate = 0xff
+        end
+        while i <= length(aargtypes)
+            argtypesi = (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
+            i += 1
+            j = 1
+            while j <= length(argtypesi)
+                ti = argtypesi[j]
+                j += 1
+                if !isvarargtype(ti)
+                    ctfuture = precise_container_type(interp, itft.contents, ti, sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x1
+                        return false
+                        @label state1
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                else
+                    ctfuture = precise_container_type(interp, itft.contents, unwrapva(ti), sv)::Future
+                    if !isready(ctfuture)
+                        nextstate = 0x2
+                        return false
+                        @label state2
+                    end
+                    (;cti, info, ai_effects) = ctfuture[]
+                    # We can't represent a repeating sequence of the same types,
+                    # so tmerge everything together to get one type that represents
+                    # everything.
+                    argt = cti[end]
+                    if isvarargtype(argt)
+                        argt = unwrapva(argt)
+                    end
+                    for k in 1:(length(cti)-1)
+                        argt = tmerge(argt, cti[k])
+                    end
+                    cti = Any[Vararg{argt}]
                 end
-                for i in 1:(length(cti)-1)
-                    argt = tmerge(argt, cti[i])
+                all_effects = merge_effects(all_effects, ai_effects)
+                if info !== nothing
+                    for call in info.each
+                        all_effects = merge_effects(all_effects, call.effects)
+                    end
+                end
+                if any(@nospecialize(t) -> t === Bottom, cti)
+                    continue
+                end
+                for k = 1:length(ctypes)
+                    ct = ctypes[k]
+                    if isvarargtype(ct[end])
+                        # This is vararg, we're not gonna be able to do any inlining,
+                        # drop the info
+                        info = nothing
+                        tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
+                        push!(ctypes´, push!(ct[1:(end - 1)], tail))
+                    else
+                        push!(ctypes´, append!(ct[:], cti))
+                    end
+                    push!(infos´, push!(copy(infos[k]), info))
                 end
-                cti = Any[Vararg{argt}]
             end
-            effects = merge_effects(effects, ai_effects)
-            if info !== nothing
-                for call in info.each
-                    effects = merge_effects(effects, call.effects)
+            # swap for the new array and empty the temporary one
+            ctypes´, ctypes = ctypes, ctypes´
+            infos´, infos = infos, infos´
+            empty!(ctypes´)
+            empty!(infos´)
+        end
+        all_effects.nothrow || (exctype = Any)
+
+        i = 1
+        while i <= length(ctypes)
+            ct = ctypes[i]
+            lct = length(ct)
+            # truncate argument list at the first Vararg
+            for k = 1:lct-1
+                cti = ct[k]
+                if isvarargtype(cti)
+                    ct[k] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(k+1):lct])
+                    resize!(ct, k)
+                    break
                 end
             end
-            if any(@nospecialize(t) -> t === Bottom, cti)
-                continue
+            callfuture = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)::Future
+            if !isready(callfuture)
+                nextstate = 0x3
+                return false
+                @label state3
             end
-            for j = 1:length(ctypes)
-                ct = ctypes[j]::Vector{Any}
-                if isvarargtype(ct[end])
-                    # This is vararg, we're not gonna be able to do any inlining,
-                    # drop the info
-                    info = nothing
-                    tail = tuple_tail_elem(typeinf_lattice(interp), unwrapva(ct[end]), cti)
-                    push!(ctypes´, push!(ct[1:(end - 1)], tail))
-                else
-                    push!(ctypes´, append!(ct[:], cti))
+            let (; info, rt, exct, effects) = callfuture[]
+                push!(retinfos, ApplyCallInfo(info, infos[i]))
+                res = tmerge(typeinf_lattice(interp), res, rt)
+                exctype = tmerge(typeinf_lattice(interp), exctype, exct)
+                all_effects = merge_effects(all_effects, effects)
+                if i < length(ctypes) && bail_out_apply(interp, InferenceLoopState(ctypes[i], res, all_effects), sv)
+                    add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
+                    # there is unanalyzed candidate, widen type and effects to the top
+                    let retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+                        applyresult[] = CallMeta(Any, Any, Effects(), retinfo)
+                        return true
+                    end
                 end
-                push!(infos′, push!(copy(infos[j]), info))
-            end
-        end
-        ctypes = ctypes´
-        infos = infos′
-    end
-    retinfos = ApplyCallInfo[]
-    retinfo = UnionSplitApplyCallInfo(retinfos)
-    napplicable = length(ctypes)
-    seen = 0
-    exct = effects.nothrow ? Union{} : Any
-    for i = 1:napplicable
-        ct = ctypes[i]
-        arginfo = infos[i]
-        lct = length(ct)
-        # truncate argument list at the first Vararg
-        for i = 1:lct-1
-            cti = ct[i]
-            if isvarargtype(cti)
-                ct[i] = tuple_tail_elem(typeinf_lattice(interp), unwrapva(cti), ct[(i+1):lct])
-                resize!(ct, i)
-                break
             end
+            i += 1
         end
-        call = abstract_call(interp, ArgInfo(nothing, ct), si, sv, max_methods)
-        seen += 1
-        push!(retinfos, ApplyCallInfo(call.info, arginfo))
-        res = tmerge(typeinf_lattice(interp), res, call.rt)
-        exct = tmerge(typeinf_lattice(interp), exct, call.exct)
-        effects = merge_effects(effects, call.effects)
-        if bail_out_apply(interp, InferenceLoopState(ct, res, effects), sv)
-            add_remark!(interp, sv, "_apply_iterate inference reached maximally imprecise information. Bailing on.")
-            break
-        end
-    end
-    if seen ≠ napplicable
-        # there is unanalyzed candidate, widen type and effects to the top
-        res = Any
-        exct = Any
-        effects = Effects()
-        retinfo = NoCallInfo() # NOTE this is necessary to prevent the inlining processing
+        # TODO: Add a special info type to capture all the iteration info.
+        # For now, only propagate info if we don't also union-split the iteration
+        applyresult[] = CallMeta(res, exctype, all_effects, retinfo)
+        return true
     end
-    # TODO: Add a special info type to capture all the iteration info.
-    # For now, only propagate info if we don't also union-split the iteration
-    return CallMeta(res, exct, effects, retinfo)
+    # start making progress on the first call
+    infercalls(interp, sv) || push!(sv.tasks, infercalls)
+    return applyresult
 end
 
 function argtype_by_index(argtypes::Vector{Any}, i::Int)
@@ -1794,7 +1948,7 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
     @nospecialize f
     la = length(argtypes)
     𝕃ᵢ = typeinf_lattice(interp)
-    ⊑ᵢ = ⊑(𝕃ᵢ)
+    ⊑, ⊏, ⊔, ⊓ = partialorder(𝕃ᵢ), strictpartialorder(𝕃ᵢ), join(𝕃ᵢ), meet(𝕃ᵢ)
     if has_conditional(𝕃ᵢ, sv) && f === Core.ifelse && fargs isa Vector{Any} && la == 4
         cnd = argtypes[2]
         if isa(cnd, Conditional)
@@ -1809,12 +1963,12 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
                 a = ssa_def_slot(fargs[3], sv)
                 b = ssa_def_slot(fargs[4], sv)
                 if isa(a, SlotNumber) && cnd.slot == slot_id(a)
-                    tx = (cnd.thentype ⊑ᵢ tx ? cnd.thentype : tmeet(𝕃ᵢ, tx, widenconst(cnd.thentype)))
+                    tx = (cnd.thentype ⊑ tx ? cnd.thentype : tx ⊓ widenconst(cnd.thentype))
                 end
                 if isa(b, SlotNumber) && cnd.slot == slot_id(b)
-                    ty = (cnd.elsetype ⊑ᵢ ty ? cnd.elsetype : tmeet(𝕃ᵢ, ty, widenconst(cnd.elsetype)))
+                    ty = (cnd.elsetype ⊑ ty ? cnd.elsetype : ty ⊓ widenconst(cnd.elsetype))
                 end
-                return tmerge(𝕃ᵢ, tx, ty)
+                return tx ⊔ ty
             end
         end
     end
@@ -1929,26 +2083,39 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
                 return Conditional(aty.slot, thentype, elsetype)
             end
         elseif f === isdefined
-            uty = argtypes[2]
             a = ssa_def_slot(fargs[2], sv)
-            if isa(uty, Union) && isa(a, SlotNumber)
-                fld = argtypes[3]
-                thentype = Bottom
-                elsetype = Bottom
-                for ty in uniontypes(uty)
-                    cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
-                    if isa(cnd, Const)
-                        if cnd.val::Bool
-                            thentype = tmerge(thentype, ty)
+            if isa(a, SlotNumber)
+                argtype2 = argtypes[2]
+                if isa(argtype2, Union)
+                    fld = argtypes[3]
+                    thentype = Bottom
+                    elsetype = Bottom
+                    for ty in uniontypes(argtype2)
+                        cnd = isdefined_tfunc(𝕃ᵢ, ty, fld)
+                        if isa(cnd, Const)
+                            if cnd.val::Bool
+                                thentype = thentype ⊔ ty
+                            else
+                                elsetype = elsetype ⊔ ty
+                            end
                         else
-                            elsetype = tmerge(elsetype, ty)
+                            thentype = thentype ⊔ ty
+                            elsetype = elsetype ⊔ ty
                         end
-                    else
-                        thentype = tmerge(thentype, ty)
-                        elsetype = tmerge(elsetype, ty)
+                    end
+                    return Conditional(a, thentype, elsetype)
+                else
+                    thentype = form_partially_defined_struct(argtype2, argtypes[3])
+                    if thentype !== nothing
+                        elsetype = argtype2
+                        if rt === Const(false)
+                            thentype = Bottom
+                        elseif rt === Const(true)
+                            elsetype = Bottom
+                        end
+                        return Conditional(a, thentype, elsetype)
                     end
                 end
-                return Conditional(a, thentype, elsetype)
             end
         end
     end
@@ -1956,6 +2123,34 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs
     return rt
 end
 
+function form_partially_defined_struct(@nospecialize(obj), @nospecialize(name))
+    obj isa Const && return nothing # nothing to refine
+    name isa Const || return nothing
+    objt0 = widenconst(obj)
+    objt = unwrap_unionall(objt0)
+    objt isa DataType || return nothing
+    isabstracttype(objt) && return nothing
+    fldidx = try_compute_fieldidx(objt, name.val)
+    fldidx === nothing && return nothing
+    nminfld = datatype_min_ninitialized(objt)
+    if ismutabletype(objt)
+        # A mutable struct can have non-contiguous undefined fields, but `PartialStruct` cannot
+        # model such a state. So here `PartialStruct` can be used to represent only the
+        # objects where the field following the minimum initialized fields is also defined.
+        if fldidx ≠ nminfld+1
+            # if it is already represented as a `PartialStruct`, we can add one more
+            # `isdefined`-field information on top of those implied by its `fields`
+            if !(obj isa PartialStruct && fldidx == length(obj.fields)+1)
+                return nothing
+            end
+        end
+    else
+        fldidx > nminfld || return nothing
+    end
+    return PartialStruct(objt0, Any[obj isa PartialStruct && i≤length(obj.fields) ?
+        obj.fields[i] : fieldtype(objt0,i) for i = 1:fldidx])
+end
+
 function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{Any}, call::CallMeta)
     na = length(argtypes)
     if isvarargtype(argtypes[end])
@@ -1970,8 +2165,8 @@ function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{An
     elseif na == 3
         a2 = argtypes[2]
         a3 = argtypes[3]
-        ⊑ᵢ = ⊑(typeinf_lattice(interp))
-        nothrow = a2 ⊑ᵢ TypeVar && (a3 ⊑ᵢ Type || a3 ⊑ᵢ TypeVar)
+        ⊑ = partialorder(typeinf_lattice(interp))
+        nothrow = a2 ⊑ TypeVar && (a3 ⊑ Type || a3 ⊑ TypeVar)
     else
         return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
     end
@@ -2003,59 +2198,73 @@ function abstract_call_unionall(interp::AbstractInterpreter, argtypes::Vector{An
     return CallMeta(ret, Any, Effects(EFFECTS_TOTAL; nothrow), call.info)
 end
 
-function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, si::StmtInfo, sv::AbsIntState)
+function abstract_invoke(interp::AbstractInterpreter, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState)
+    argtypes = arginfo.argtypes
     ft′ = argtype_by_index(argtypes, 2)
     ft = widenconst(ft′)
-    ft === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    ft === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3), false)
-    isexact || return CallMeta(Any, Any, Effects(), NoCallInfo())
+    isexact || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     unwrapped = unwrap_unionall(types)
-    if types === Bottom || !(unwrapped isa DataType) || unwrapped.name !== Tuple.name
-        return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+    types === Bottom && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+    if !(unwrapped isa DataType && unwrapped.name === Tuple.name)
+        return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
     end
     argtype = argtypes_to_type(argtype_tail(argtypes, 4))
     nargtype = typeintersect(types, argtype)
-    nargtype === Bottom && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-    nargtype isa DataType || return CallMeta(Any, Any, Effects(), NoCallInfo()) # other cases are not implemented below
-    isdispatchelem(ft) || return CallMeta(Any, Any, Effects(), NoCallInfo()) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+    nargtype === Bottom && return Future(CallMeta(Bottom, TypeError, EFFECTS_THROWS, NoCallInfo()))
+    nargtype isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # other cases are not implemented below
+    isdispatchelem(ft) || return Future(CallMeta(Any, Any, Effects(), NoCallInfo())) # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
     ft = ft::DataType
     lookupsig = rewrap_unionall(Tuple{ft, unwrapped.parameters...}, types)::Type
     nargtype = Tuple{ft, nargtype.parameters...}
     argtype = Tuple{ft, argtype.parameters...}
     match, valid_worlds = findsup(lookupsig, method_table(interp))
-    match === nothing && return CallMeta(Any, Any, Effects(), NoCallInfo())
+    match === nothing && return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     update_valid_age!(sv, valid_worlds)
     method = match.method
     tienv = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
-    ti = tienv[1]; env = tienv[2]::SimpleVector
-    result = abstract_call_method(interp, method, ti, env, false, si, sv)
-    (; rt, edge, effects, volatile_inf_result) = result
+    ti = tienv[1]
+    env = tienv[2]::SimpleVector
+    mresult = abstract_call_method(interp, method, ti, env, false, si, sv)::Future
     match = MethodMatch(ti, env, method, argtype <: method.sig)
-    res = nothing
-    sig = match.spec_types
-    argtypes′ = invoke_rewrite(argtypes)
-    fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
-    arginfo = ArgInfo(fargs′, argtypes′)
-    # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
-    # for i in 1:length(argtypes′)
-    #     t, a = ti.parameters[i], argtypes′[i]
-    #     argtypes′[i] = t ⊑ a ? t : a
-    # end
-    𝕃ₚ = ipo_lattice(interp)
-    f = singleton_type(ft′)
-    invokecall = InvokeCall(types, lookupsig)
-    const_call_result = abstract_call_method_with_const_args(interp,
-        result, f, arginfo, si, match, sv, invokecall)
-    const_result = volatile_inf_result
-    if const_call_result !== nothing
-        if ⊑(𝕃ₚ, const_call_result.rt, rt)
-            (; rt, effects, const_result, edge) = const_call_result
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, edge, effects, volatile_inf_result) = result
+        res = nothing
+        sig = match.spec_types
+        argtypes′ = invoke_rewrite(argtypes)
+        fargs = arginfo.fargs
+        fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
+        arginfo = ArgInfo(fargs′, argtypes′)
+        # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
+        # for i in 1:length(argtypes′)
+        #     t, a = ti.parameters[i], argtypes′[i]
+        #     argtypes′[i] = t ⊑ a ? t : a
+        # end
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        f = singleton_type(ft′)
+        invokecall = InvokeCall(types, lookupsig)
+        const_call_result = abstract_call_method_with_const_args(interp,
+            result, f, arginfo, si, match, sv, invokecall)
+        const_result = volatile_inf_result
+        if const_call_result !== nothing
+            if const_call_result.rt ⊑ rt
+                (; rt, effects, const_result, edge) = const_call_result
+            end
+            if const_call_result.exct ⋤ exct
+                (; exct, const_result, edge) = const_call_result
+            end
         end
+        rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
+        info = InvokeCallInfo(match, const_result)
+        edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
+        if !match.fully_covers
+            effects = Effects(effects; nothrow=false)
+            exct = exct ⊔ TypeError
+        end
+        return CallMeta(rt, exct, effects, info)
     end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, sig)
-    info = InvokeCallInfo(match, const_result)
-    edge !== nothing && add_invoke_backedge!(sv, lookupsig, edge)
-    return CallMeta(rt, Any, effects, info)
 end
 
 function invoke_rewrite(xs::Vector{Any})
@@ -2068,28 +2277,42 @@ end
 function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::AbsIntState)
     if length(argtypes) == 3
         finalizer_argvec = Any[argtypes[2], argtypes[3]]
-        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)
-        return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
+        call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)::Future
+        return Future{CallMeta}(call, interp, sv) do call, interp, sv
+            return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
+        end
     end
-    return CallMeta(Nothing, Any, Effects(), NoCallInfo())
+    return Future(CallMeta(Nothing, Any, Effects(), NoCallInfo()))
 end
 
 function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
     na = length(argtypes)
-    𝕃ᵢ = typeinf_lattice(interp)
+    ⊔ = join(typeinf_lattice(interp))
     if na == 2
         argtype2 = argtypes[2]
         if isvarargtype(argtype2)
-            exct = tmerge(𝕃ᵢ, unwrapva(argtype2), ArgumentError)
+            exct = unwrapva(argtype2) ⊔ ArgumentError
         else
             exct = argtype2
         end
     elseif na == 3 && isvarargtype(argtypes[3])
-        exct = tmerge(𝕃ᵢ, argtypes[2], ArgumentError)
+        exct = argtypes[2] ⊔ ArgumentError
     else
         exct = ArgumentError
     end
-    return CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo())
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
+end
+
+function abstract_throw_methoderror(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)
+    exct = if length(argtypes) == 1
+        ArgumentError
+    elseif !isvarargtype(argtypes[2])
+        MethodError
+    else
+        ⊔ = join(typeinf_lattice(interp))
+        MethodError ⊔ ArgumentError
+    end
+    return Future(CallMeta(Union{}, exct, EFFECTS_THROWS, NoCallInfo()))
 end
 
 # call where the function is known exactly
@@ -2112,6 +2335,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             return abstract_applicable(interp, argtypes, sv, max_methods)
         elseif f === throw
             return abstract_throw(interp, argtypes, sv)
+        elseif f === Core.throw_methoderror
+            return abstract_throw_methoderror(interp, argtypes, sv)
         end
         rt = abstract_call_builtin(interp, f, arginfo, sv)
         ft = popfirst!(argtypes)
@@ -2122,68 +2347,81 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             exct = builtin_exct(𝕃ᵢ, f, argtypes, rt)
         end
         pushfirst!(argtypes, ft)
-        return CallMeta(rt, exct, effects, NoCallInfo())
+        refinements = nothing
+        if sv isa InferenceState && f === typeassert
+            # perform very limited back-propagation of invariants after this type assertion
+            if rt !== Bottom && isa(fargs, Vector{Any})
+                farg2 = fargs[2]
+                if farg2 isa SlotNumber
+                    refinements = SlotRefinement(farg2, rt)
+                end
+            end
+        end
+        return Future(CallMeta(rt, exct, effects, NoCallInfo(), refinements))
     elseif isa(f, Core.OpaqueClosure)
         # calling an OpaqueClosure about which we have no information returns no information
-        return CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo())
+        return Future(CallMeta(typeof(f).parameters[2], Any, Effects(), NoCallInfo()))
     elseif f === TypeVar && !isvarargtype(argtypes[end])
         # Manually look through the definition of TypeVar to
         # make sure to be able to get `PartialTypeVar`s out.
-        2 ≤ la ≤ 4 || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-        n = argtypes[2]
-        ub_var = Const(Any)
-        lb_var = Const(Union{})
-        if la == 4
-            ub_var = argtypes[4]
-            lb_var = argtypes[3]
-        elseif la == 3
-            ub_var = argtypes[3]
-        end
+        2 ≤ la ≤ 4 || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
         # make sure generic code is prepared for inlining if needed later
-        call = let T = Any[Type{TypeVar}, Any, Any, Any]
+        let T = Any[Type{TypeVar}, Any, Any, Any]
             resize!(T, la)
             atype = Tuple{T...}
             T[1] = Const(TypeVar)
-            abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)
-        end
-        pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
-        typevar_argtypes = Any[n, lb_var, ub_var]
-        effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
-        if effects.nothrow
-            exct = Union{}
-        else
-            exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+            let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, T), si, atype, sv, max_methods)::Future
+                return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                    n = argtypes[2]
+                    ub_var = Const(Any)
+                    lb_var = Const(Union{})
+                    if la == 4
+                        ub_var = argtypes[4]
+                        lb_var = argtypes[3]
+                    elseif la == 3
+                        ub_var = argtypes[3]
+                    end
+                    pT = typevar_tfunc(𝕃ᵢ, n, lb_var, ub_var)
+                    typevar_argtypes = Any[n, lb_var, ub_var]
+                    effects = builtin_effects(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    if effects.nothrow
+                        exct = Union{}
+                    else
+                        exct = builtin_exct(𝕃ᵢ, Core._typevar, typevar_argtypes, pT)
+                    end
+                    return CallMeta(pT, exct, effects, call.info)
+                end
+            end
         end
-        return CallMeta(pT, exct, effects, call.info)
     elseif f === UnionAll
-        call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)
-        return abstract_call_unionall(interp, argtypes, call)
+        let call = abstract_call_gf_by_type(interp, f, ArgInfo(nothing, Any[Const(UnionAll), Any, Any]), si, Tuple{Type{UnionAll}, Any, Any}, sv, max_methods)::Future
+            return Future{CallMeta}(call, interp, sv) do call, interp, sv
+                return abstract_call_unionall(interp, argtypes, call)
+            end
+        end
     elseif f === Tuple && la == 2
         aty = argtypes[2]
         ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
         if !isconcretetype(ty)
-            return CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo())
+            return Future(CallMeta(Tuple, Any, EFFECTS_UNKNOWN, NoCallInfo()))
         end
     elseif is_return_type(f)
         return return_type_tfunc(interp, argtypes, si, sv)
-    elseif la == 2 && istopfunction(f, :!)
-        # handle Conditional propagation through !Bool
-        aty = argtypes[2]
-        if isa(aty, Conditional)
-            call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Bool]), si, Tuple{typeof(f), Bool}, sv, max_methods) # make sure we've inferred `!(::Bool)`
-            return CallMeta(Conditional(aty.slot, aty.elsetype, aty.thentype), Any, call.effects, call.info)
-        end
-    elseif la == 3 && istopfunction(f, :!==)
+    elseif la == 3 && f === Core.:(!==)
         # mark !== as exactly a negated call to ===
-        call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)
-        rty = abstract_call_known(interp, (===), arginfo, si, sv, max_methods).rt
-        if isa(rty, Conditional)
-            return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
-        elseif isa(rty, Const)
-            return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
-        end
-        return call
-    elseif la == 3 && istopfunction(f, :(>:))
+        let callfuture = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Any, Any]), si, Tuple{typeof(f), Any, Any}, sv, max_methods)::Future,
+            rtfuture = abstract_call_known(interp, (===), arginfo, si, sv, max_methods)::Future
+            return Future{CallMeta}(isready(callfuture) && isready(rtfuture), interp, sv) do interp, sv
+                local rty = rtfuture[].rt
+                if isa(rty, Conditional)
+                    return CallMeta(Conditional(rty.slot, rty.elsetype, rty.thentype), Bottom, EFFECTS_TOTAL, NoCallInfo()) # swap if-else
+                elseif isa(rty, Const)
+                    return CallMeta(Const(rty.val === false), Bottom, EFFECTS_TOTAL, MethodResultPure())
+                end
+                return callfuture[]
+            end
+        end
+    elseif la == 3 && f === Core.:(>:)
         # mark issupertype as a exact alias for issubtype
         # swap T1 and T2 arguments and call <:
         if fargs !== nothing && length(fargs) == 3
@@ -2193,47 +2431,59 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         end
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
         return abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), si, sv, max_methods)
-    elseif la == 2 && istopfunction(f, :typename)
-        return CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure())
+    elseif la == 2 && f === Core.typename
+        return Future(CallMeta(typename_static(argtypes[2]), Bottom, EFFECTS_TOTAL, MethodResultPure()))
     elseif f === Core._hasmethod
-        return _hasmethod_tfunc(interp, argtypes, sv)
+        return Future(_hasmethod_tfunc(interp, argtypes, sv))
     end
     atype = argtypes_to_type(argtypes)
-    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, f, arginfo, si, atype, sv, max_methods)::Future
 end
 
 function abstract_call_opaque_closure(interp::AbstractInterpreter,
     closure::PartialOpaque, arginfo::ArgInfo, si::StmtInfo, sv::AbsIntState, check::Bool=true)
     sig = argtypes_to_type(arginfo.argtypes)
-    result = abstract_call_method(interp, closure.source::Method, sig, Core.svec(), false, si, sv)
-    (; rt, edge, effects, volatile_inf_result) = result
     tt = closure.typ
-    sigT = (unwrap_unionall(tt)::DataType).parameters[1]
-    match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
-    𝕃ₚ = ipo_lattice(interp)
-    ⊑ₚ = ⊑(𝕃ₚ)
-    const_result = volatile_inf_result
-    if !result.edgecycle
-        const_call_result = abstract_call_method_with_const_args(interp, result,
-            nothing, arginfo, si, match, sv)
-        if const_call_result !== nothing
-            if const_call_result.rt ⊑ₚ rt
-                (; rt, effects, const_result, edge) = const_call_result
+    ocargsig = rewrap_unionall((unwrap_unionall(tt)::DataType).parameters[1], tt)
+    ocargsig′ = unwrap_unionall(ocargsig)
+    ocargsig′ isa DataType || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
+    ocsig = rewrap_unionall(Tuple{Tuple, ocargsig′.parameters...}, ocargsig)
+    hasintersect(sig, ocsig) || return Future(CallMeta(Union{}, Union{MethodError,TypeError}, EFFECTS_THROWS, NoCallInfo()))
+    ocmethod = closure.source::Method
+    match = MethodMatch(sig, Core.svec(), ocmethod, sig <: ocsig)
+    mresult = abstract_call_method(interp, ocmethod, sig, Core.svec(), false, si, sv)
+    ocsig_box = Core.Box(ocsig)
+    return Future{CallMeta}(mresult, interp, sv) do result, interp, sv
+        (; rt, exct, edge, effects, volatile_inf_result, edgecycle) = result
+        𝕃ₚ = ipo_lattice(interp)
+        ⊑, ⋤, ⊔ = partialorder(𝕃ₚ), strictneqpartialorder(𝕃ₚ), join(𝕃ₚ)
+        const_result = volatile_inf_result
+        if !edgecycle
+            const_call_result = abstract_call_method_with_const_args(interp, result,
+                nothing, arginfo, si, match, sv)
+            if const_call_result !== nothing
+                if const_call_result.rt ⊑ rt
+                    (; rt, effects, const_result, edge) = const_call_result
+                end
+                if const_call_result.exct ⋤ exct
+                    (; exct, const_result, edge) = const_call_result
+                end
             end
         end
-    end
-    if check # analyze implicit type asserts on argument and return type
-        ftt = closure.typ
-        (aty, rty) = (unwrap_unionall(ftt)::DataType).parameters
-        rty = rewrap_unionall(rty isa TypeVar ? rty.lb : rty, ftt)
-        if !(rt ⊑ₚ rty && tuple_tfunc(𝕃ₚ, arginfo.argtypes[2:end]) ⊑ₚ rewrap_unionall(aty, ftt))
-            effects = Effects(effects; nothrow=false)
+        if check # analyze implicit type asserts on argument and return type
+            ftt = closure.typ
+            rty = (unwrap_unionall(ftt)::DataType).parameters[2]
+            rty = rewrap_unionall(rty isa TypeVar ? rty.ub : rty, ftt)
+            if !(rt ⊑ rty && sig ⊑ ocsig_box.contents)
+                effects = Effects(effects; nothrow=false)
+                exct = exct ⊔ TypeError
+            end
         end
+        rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
+        info = OpaqueClosureCallInfo(match, const_result)
+        edge !== nothing && add_backedge!(sv, edge)
+        return CallMeta(rt, exct, effects, info)
     end
-    rt = from_interprocedural!(interp, rt, sv, arginfo, match.spec_types)
-    info = OpaqueClosureCallInfo(match, const_result)
-    edge !== nothing && add_backedge!(sv, edge)
-    return CallMeta(rt, Any, effects, info)
 end
 
 function most_general_argtypes(closure::PartialOpaque)
@@ -2258,17 +2508,17 @@ function abstract_call_unknown(interp::AbstractInterpreter, @nospecialize(ft),
     wft = widenconst(ft)
     if hasintersect(wft, Builtin)
         add_remark!(interp, sv, "Could not identify method table for call")
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     elseif hasintersect(wft, Core.OpaqueClosure)
         uft = unwrap_unionall(wft)
         if isa(uft, DataType)
-            return CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo())
+            return Future(CallMeta(rewrap_unionall(uft.parameters[2], wft), Any, Effects(), NoCallInfo()))
         end
-        return CallMeta(Any, Any, Effects(), NoCallInfo())
+        return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     end
     # non-constant function, but the number of arguments is known and the `f` is not a builtin or intrinsic
     atype = argtypes_to_type(arginfo.argtypes)
-    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, nothing, arginfo, si, atype, sv, max_methods)::Future
 end
 
 # call where the function is any lattice element
@@ -2339,7 +2589,7 @@ function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::U
     # this may be the wrong world for the call,
     # but some of the result is likely to be valid anyways
     # and that may help generate better codegen
-    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)
+    abstract_call(interp, ArgInfo(nothing, at), StmtInfo(false), sv)::Future
     rt = e.args[1]
     isa(rt, Type) || (rt = Any)
     return RTEffects(rt, Any, EFFECTS_UNKNOWN)
@@ -2380,6 +2630,7 @@ function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, sv::AbsI
         # TODO: We still have non-linearized cglobal
         @assert e.args[1] === Core.tuple || e.args[1] === GlobalRef(Core, :tuple)
     else
+        @assert e.head !== :(=)
         # Some of our tests expect us to handle invalid IR here and error later
         # - permit that for now.
         # @assert false "Unexpected EXPR head in value position"
@@ -2412,10 +2663,14 @@ function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::
 end
 
 struct RTEffects
-    rt
-    exct
+    rt::Any
+    exct::Any
     effects::Effects
-    RTEffects(@nospecialize(rt), @nospecialize(exct), effects::Effects) = new(rt, exct, effects)
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function RTEffects(rt, exct, effects::Effects, refinements=nothing)
+        @nospecialize rt exct refinements
+        return new(rt, exct, effects, refinements)
+    end
 end
 
 function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::InferenceState)
@@ -2424,8 +2679,13 @@ function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, sv::Infere
         add_curr_ssaflag!(sv, IR_FLAG_UNUSED)
     end
     si = StmtInfo(!unused)
-    call = abstract_call(interp, arginfo, si, sv)
-    sv.stmt_info[sv.currpc] = call.info
+    call = abstract_call(interp, arginfo, si, sv)::Future
+    Future{Nothing}(call, interp, sv) do call, interp, sv
+        # this only is needed for the side-effect, sequenced before any task tries to consume the return value,
+        # which this will do even without returning this Future
+        sv.stmt_info[sv.currpc] = call.info
+        nothing
+    end
     return call
 end
 
@@ -2434,11 +2694,14 @@ function abstract_eval_call(interp::AbstractInterpreter, e::Expr, vtypes::Union{
     ea = e.args
     argtypes = collect_argtypes(interp, ea, vtypes, sv)
     if argtypes === nothing
-        return RTEffects(Bottom, Any, Effects())
+        return Future(RTEffects(Bottom, Any, Effects()))
     end
     arginfo = ArgInfo(ea, argtypes)
-    (; rt, exct, effects) = abstract_call(interp, arginfo, sv)
-    return RTEffects(rt, exct, effects)
+    call = abstract_call(interp, arginfo, sv)::Future
+    return Future{RTEffects}(call, interp, sv) do call, interp, sv
+        (; rt, exct, effects, refinements) = call
+        return RTEffects(rt, exct, effects, refinements)
+    end
 end
 
 function abstract_eval_new(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
@@ -2487,20 +2750,18 @@ function abstract_eval_new(interp::AbstractInterpreter, e::Expr, vtypes::Union{V
                 end
                 ats[i] = at
             end
-            # For now, don't allow:
-            # - Const/PartialStruct of mutables (but still allow PartialStruct of mutables
-            #   with `const` fields if anything refined)
-            # - partially initialized Const/PartialStruct
-            if fcount == nargs
-                if consistent === ALWAYS_TRUE && allconst
-                    argvals = Vector{Any}(undef, nargs)
-                    for j in 1:nargs
-                        argvals[j] = (ats[j]::Const).val
-                    end
-                    rt = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), rt, argvals, nargs))
-                elseif anyrefine
-                    rt = PartialStruct(rt, ats)
+            if fcount == nargs && consistent === ALWAYS_TRUE && allconst
+                argvals = Vector{Any}(undef, nargs)
+                for j in 1:nargs
+                    argvals[j] = (ats[j]::Const).val
                 end
+                rt = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), rt, argvals, nargs))
+            elseif anyrefine || nargs > datatype_min_ninitialized(rt)
+                # propagate partially initialized struct as `PartialStruct` when:
+                # - any refinement information is available (`anyrefine`), or when
+                # - `nargs` is greater than `n_initialized` derived from the struct type
+                #   information alone
+                rt = PartialStruct(rt, ats)
             end
         else
             rt = refine_partial_type(rt)
@@ -2570,12 +2831,15 @@ function abstract_eval_new_opaque_closure(interp::AbstractInterpreter, e::Expr,
                 argtypes = most_general_argtypes(rt)
                 pushfirst!(argtypes, rt.env)
                 callinfo = abstract_call_opaque_closure(interp, rt,
-                    ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)
-                sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                    ArgInfo(nothing, argtypes), StmtInfo(true), sv, #=check=#false)::Future
+                Future{Nothing}(callinfo, interp, sv) do callinfo, interp, sv
+                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                    nothing
+                end
             end
         end
     end
-    return RTEffects(rt, Any, effects)
+    return Future(RTEffects(rt, Any, effects))
 end
 
 function abstract_eval_copyast(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
@@ -2602,6 +2866,8 @@ function abstract_eval_isdefined(interp::AbstractInterpreter, e::Expr, vtypes::U
             rt = Const(false) # never assigned previously
         elseif !vtyp.undef
             rt = Const(true) # definitely assigned previously
+        else # form `Conditional` to refine `vtyp.undef` in the then branch
+            rt = Conditional(sym, vtyp.typ, vtyp.typ; isdefined=true)
         end
     elseif isa(sym, GlobalRef)
         if InferenceParams(interp).assume_bindings_static
@@ -2669,7 +2935,7 @@ function abstract_eval_static_parameter(::AbstractInterpreter, e::Expr, sv::AbsI
 end
 
 function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtypes::Union{VarTable,Nothing},
-                                      sv::AbsIntState)
+                                      sv::AbsIntState)::Future{RTEffects}
     ehead = e.head
     if ehead === :call
         return abstract_eval_call(interp, e, vtypes, sv)
@@ -2705,8 +2971,10 @@ function abstract_eval_statement_expr(interp::AbstractInterpreter, e::Expr, vtyp
     elseif ehead === :gc_preserve_end || ehead === :leave || ehead === :pop_exception ||
            ehead === :global || ehead === :popaliasscope
         return RTEffects(Nothing, Union{}, Effects(EFFECTS_TOTAL; effect_free=EFFECT_FREE_GLOBALLY))
+    elseif ehead === :globaldecl
+        return RTEffects(Nothing, Any, EFFECTS_UNKNOWN)
     elseif ehead === :thunk
-        return RTEffects(Any, Any, EFFECTS_UNKNOWN)
+        return RTEffects(Any, Any, Effects())
     end
     # N.B.: abstract_eval_value_expr can modify the global effects, but
     # we move out any arguments with effects during SSA construction later
@@ -2765,43 +3033,7 @@ function stmt_taints_inbounds_consistency(sv::AbsIntState)
     return has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
 end
 
-function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
-    if !isa(e, Expr)
-        if isa(e, PhiNode)
-            add_curr_ssaflag!(sv, IR_FLAGS_REMOVABLE)
-            # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
-            # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
-            # over the first and last iterations. By tmerging in the current old_rt, we ensure that
-            # we will not lose an intermediate value.
-            rt = abstract_eval_phi(interp, e, vtypes, sv)
-            old_rt = sv.ssavaluetypes[sv.currpc]
-            rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
-            return RTEffects(rt, Union{}, EFFECTS_TOTAL)
-        end
-        (; rt, exct, effects) = abstract_eval_special_value(interp, e, vtypes, sv)
-    else
-        (; rt, exct, effects) = abstract_eval_statement_expr(interp, e, vtypes, sv)
-        if effects.noub === NOUB_IF_NOINBOUNDS
-            if has_curr_ssaflag(sv, IR_FLAG_INBOUNDS)
-                effects = Effects(effects; noub=ALWAYS_FALSE)
-            elseif !propagate_inbounds(sv)
-                # The callee read our inbounds flag, but unless we propagate inbounds,
-                # we ourselves don't read our parent's inbounds.
-                effects = Effects(effects; noub=ALWAYS_TRUE)
-            end
-        end
-        e = e::Expr
-        @assert !isa(rt, TypeVar) "unhandled TypeVar"
-        rt = maybe_singleton_const(rt)
-        if !isempty(sv.pclimitations)
-            if rt isa Const || rt === Union{}
-                empty!(sv.pclimitations)
-            else
-                rt = LimitedAccuracy(rt, sv.pclimitations)
-                sv.pclimitations = IdSet{InferenceState}()
-            end
-        end
-    end
+function merge_override_effects!(interp::AbstractInterpreter, effects::Effects, sv::InferenceState)
     # N.B.: This only applies to the effects of the statement itself.
     # It is possible for arguments (GlobalRef/:static_parameter) to throw,
     # but these will be recomputed during SSA construction later.
@@ -2809,8 +3041,11 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
     effects = override_effects(effects, override)
     set_curr_ssaflag!(sv, flags_for_effects(effects), IR_FLAGS_EFFECTS)
     merge_effects!(interp, sv, effects)
+    return effects
+end
 
-    return RTEffects(rt, exct, effects)
+function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
+    @assert !isa(e, Union{Expr, PhiNode, NewvarNode})
 end
 
 function override_effects(effects::Effects, override::EffectsOverride)
@@ -2822,8 +3057,9 @@ function override_effects(effects::Effects, override::EffectsOverride)
         notaskstate = override.notaskstate ? true : effects.notaskstate,
         inaccessiblememonly = override.inaccessiblememonly ? ALWAYS_TRUE : effects.inaccessiblememonly,
         noub = override.noub ? ALWAYS_TRUE :
-               (override.noub_if_noinbounds && effects.noub !== ALWAYS_TRUE) ? NOUB_IF_NOINBOUNDS :
-               effects.noub)
+            (override.noub_if_noinbounds && effects.noub !== ALWAYS_TRUE) ? NOUB_IF_NOINBOUNDS :
+            effects.noub,
+        nortcall = override.nortcall ? true : effects.nortcall)
 end
 
 isdefined_globalref(g::GlobalRef) = !iszero(ccall(:jl_globalref_boundp, Cint, (Any,), g))
@@ -3005,15 +3241,16 @@ end
 @nospecializeinfer function widenreturn_partials(𝕃ᵢ::PartialsLattice, @nospecialize(rt), info::BestguessInfo)
     if isa(rt, PartialStruct)
         fields = copy(rt.fields)
-        local anyrefine = false
+        anyrefine = !isvarargtype(rt.fields[end]) &&
+            length(rt.fields) > datatype_min_ninitialized(unwrap_unionall(rt.typ))
         𝕃 = typeinf_lattice(info.interp)
+        ⊏ = strictpartialorder(𝕃)
         for i in 1:length(fields)
             a = fields[i]
             a = isvarargtype(a) ? a : widenreturn_noslotwrapper(𝕃, a, info)
             if !anyrefine
                 # TODO: consider adding && const_prop_profitable(a) here?
-                anyrefine = has_extended_info(a) ||
-                            ⊏(𝕃, a, fieldtype(rt.typ, i))
+                anyrefine = has_extended_info(a) || a ⊏ fieldtype(rt.typ, i)
             end
             fields[i] = a
         end
@@ -3054,55 +3291,6 @@ function handle_control_backedge!(interp::AbstractInterpreter, frame::InferenceS
     return nothing
 end
 
-struct BasicStmtChange
-    changes::Union{Nothing,StateUpdate}
-    rt::Any # extended lattice element or `nothing` - `nothing` if this statement may not be used as an SSA Value
-    exct::Any
-    # TODO effects::Effects
-    BasicStmtChange(changes::Union{Nothing,StateUpdate}, @nospecialize(rt), @nospecialize(exct)) = new(changes, rt, exct)
-end
-
-@inline function abstract_eval_basic_statement(interp::AbstractInterpreter,
-    @nospecialize(stmt), pc_vartable::VarTable, frame::InferenceState)
-    if isa(stmt, NewvarNode)
-        changes = StateUpdate(stmt.slot, VarState(Bottom, true), pc_vartable, false)
-        return BasicStmtChange(changes, nothing, Union{})
-    elseif !isa(stmt, Expr)
-        (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, rt, exct)
-    end
-    changes = nothing
-    hd = stmt.head
-    if hd === :(=)
-        (; rt, exct) = abstract_eval_statement(interp, stmt.args[2], pc_vartable, frame)
-        if rt === Bottom
-            return BasicStmtChange(nothing, Bottom, exct)
-        end
-        lhs = stmt.args[1]
-        if isa(lhs, SlotNumber)
-            changes = StateUpdate(lhs, VarState(rt, false), pc_vartable, false)
-        elseif isa(lhs, GlobalRef)
-            handle_global_assignment!(interp, frame, lhs, rt)
-        elseif !isa(lhs, SSAValue)
-            merge_effects!(interp, frame, EFFECTS_UNKNOWN)
-        end
-        return BasicStmtChange(changes, rt, exct)
-    elseif hd === :method
-        fname = stmt.args[1]
-        if isa(fname, SlotNumber)
-            changes = StateUpdate(fname, VarState(Any, false), pc_vartable, false)
-        end
-        return BasicStmtChange(changes, nothing, Union{})
-    elseif (hd === :code_coverage_effect || (
-            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
-            is_meta_expr(stmt)))
-        return BasicStmtChange(nothing, Nothing, Bottom)
-    else
-        (; rt, exct) = abstract_eval_statement(interp, stmt, pc_vartable, frame)
-        return BasicStmtChange(nothing, rt, exct)
-    end
-end
-
 function update_bbstate!(𝕃ᵢ::AbstractLattice, frame::InferenceState, bb::Int, vartable::VarTable)
     bbtable = frame.bb_vartables[bb]
     if bbtable === nothing
@@ -3132,7 +3320,7 @@ function update_bestguess!(interp::AbstractInterpreter, frame::InferenceState,
     # narrow representation of bestguess slightly to prepare for tmerge with rt
     if rt isa InterConditional && bestguess isa Const
         slot_id = rt.slot
-        old_id_type = slottypes[slot_id]
+        old_id_type = widenconditional(slottypes[slot_id])
         if bestguess.val === true && rt.elsetype !== Bottom
             bestguess = InterConditional(slot_id, old_id_type, Bottom)
         elseif bestguess.val === false && rt.thentype !== Bottom
@@ -3202,28 +3390,45 @@ function update_cycle_worklists!(callback, frame::InferenceState)
 end
 
 # make as much progress on `frame` as possible (without handling cycles)
-function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
+struct CurrentState
+    result::Future
+    currstate::VarTable
+    bbstart::Int
+    bbend::Int
+    CurrentState(result::Future, currstate::VarTable, bbstart::Int, bbend::Int) = new(result, currstate, bbstart, bbend)
+    CurrentState() = new()
+end
+function typeinf_local(interp::AbstractInterpreter, frame::InferenceState, nextresult::CurrentState)
     @assert !is_inferred(frame)
-    frame.dont_work_on_me = true # mark that this function is currently on the stack
     W = frame.ip
     ssavaluetypes = frame.ssavaluetypes
     bbs = frame.cfg.blocks
     nbbs = length(bbs)
     𝕃ᵢ = typeinf_lattice(interp)
-
+    states = frame.bb_vartables
     currbb = frame.currbb
+    currpc = frame.currpc
+
+    if isdefined(nextresult, :result)
+        # for reasons that are fairly unclear, some state is arbitrarily on the stack instead in the InferenceState as normal
+        bbstart = nextresult.bbstart
+        bbend = nextresult.bbend
+        currstate = nextresult.currstate
+        @goto injectresult
+    end
+
     if currbb != 1
         currbb = frame.currbb = _bits_findnext(W.bits, 1)::Int # next basic block
     end
-
-    states = frame.bb_vartables
     currstate = copy(states[currbb]::VarTable)
     while currbb <= nbbs
         delete!(W, currbb)
         bbstart = first(bbs[currbb].stmts)
         bbend = last(bbs[currbb].stmts)
 
-        for currpc in bbstart:bbend
+        currpc = bbstart - 1
+        while currpc < bbend
+            currpc += 1
             frame.currpc = currpc
             empty_backedges!(frame, currpc)
             stmt = frame.src.code[currpc]
@@ -3240,7 +3445,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                     @goto branch
                 elseif isa(stmt, GotoIfNot)
                     condx = stmt.cond
-                    condxslot = ssa_def_slot(condx, frame)
+                    condslot = ssa_def_slot(condx, frame)
                     condt = abstract_eval_value(interp, condx, currstate, frame)
                     if condt === Bottom
                         ssavaluetypes[currpc] = Bottom
@@ -3248,10 +3453,10 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                         @goto find_next_bb
                     end
                     orig_condt = condt
-                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condxslot, SlotNumber)
+                    if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condslot, SlotNumber)
                         # if this non-`Conditional` object is a slot, we form and propagate
                         # the conditional constraint on it
-                        condt = Conditional(condxslot, Const(true), Const(false))
+                        condt = Conditional(condslot, Const(true), Const(false))
                     end
                     condval = maybe_extract_const_bool(condt)
                     nothrow = (condval !== nothing) || ⊑(𝕃ᵢ, orig_condt, Bool)
@@ -3297,21 +3502,31 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                         # We continue with the true branch, but process the false
                         # branch here.
                         if isa(condt, Conditional)
-                            else_change = conditional_change(𝕃ᵢ, currstate, condt.elsetype, condt.slot)
+                            else_change = conditional_change(𝕃ᵢ, currstate, condt, #=then_or_else=#false)
                             if else_change !== nothing
-                                false_vartable = stoverwrite1!(copy(currstate), else_change)
+                                elsestate = copy(currstate)
+                                stoverwrite1!(elsestate, else_change)
+                            elseif condslot isa SlotNumber
+                                elsestate = copy(currstate)
                             else
-                                false_vartable = currstate
+                                elsestate = currstate
                             end
-                            changed = update_bbstate!(𝕃ᵢ, frame, falsebb, false_vartable)
-                            then_change = conditional_change(𝕃ᵢ, currstate, condt.thentype, condt.slot)
+                            if condslot isa SlotNumber # refine the type of this conditional object itself for this else branch
+                                stoverwrite1!(elsestate, condition_object_change(currstate, condt, condslot, #=then_or_else=#false))
+                            end
+                            else_changed = update_bbstate!(𝕃ᵢ, frame, falsebb, elsestate)
+                            then_change = conditional_change(𝕃ᵢ, currstate, condt, #=then_or_else=#true)
+                            thenstate = currstate
                             if then_change !== nothing
-                                stoverwrite1!(currstate, then_change)
+                                stoverwrite1!(thenstate, then_change)
+                            end
+                            if condslot isa SlotNumber # refine the type of this conditional object itself for this then branch
+                                stoverwrite1!(thenstate, condition_object_change(currstate, condt, condslot, #=then_or_else=#true))
                             end
                         else
-                            changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate)
+                            else_changed = update_bbstate!(𝕃ᵢ, frame, falsebb, currstate)
                         end
-                        if changed
+                        if else_changed
                             handle_control_backedge!(interp, frame, currpc, stmt.dest)
                             push!(W, falsebb)
                         end
@@ -3325,14 +3540,14 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                             return caller.ssavaluetypes[caller_pc] !== Any
                         end
                     end
-                    ssavaluetypes[frame.currpc] = Any
+                    ssavaluetypes[currpc] = Any
                     @goto find_next_bb
                 elseif isa(stmt, EnterNode)
                     ssavaluetypes[currpc] = Any
                     add_curr_ssaflag!(frame, IR_FLAG_NOTHROW)
                     if isdefined(stmt, :scope)
                         scopet = abstract_eval_value(interp, stmt.scope, currstate, frame)
-                        handler = gethandler(frame, frame.currpc+1)::TryCatchFrame
+                        handler = gethandler(frame, currpc + 1)::TryCatchFrame
                         @assert handler.scopet !== nothing
                         if !⊑(𝕃ᵢ, scopet, handler.scopet)
                             handler.scopet = tmerge(𝕃ᵢ, scopet, handler.scopet)
@@ -3351,8 +3566,91 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
                 # Fall through terminator - treat as regular stmt
             end
             # Process non control-flow statements
-            (; changes, rt, exct) = abstract_eval_basic_statement(interp,
-                stmt, currstate, frame)
+            @assert isempty(frame.tasks)
+            rt = nothing
+            exct = Bottom
+            changes = nothing
+            refinements = nothing
+            effects = nothing
+            if isa(stmt, NewvarNode)
+                changes = StateUpdate(stmt.slot, VarState(Bottom, true))
+            elseif isa(stmt, PhiNode)
+                add_curr_ssaflag!(frame, IR_FLAGS_REMOVABLE)
+                # Implement convergence for PhiNodes. In particular, PhiNodes need to tmerge over
+                # the incoming values from all iterations, but `abstract_eval_phi` will only tmerge
+                # over the first and last iterations. By tmerging in the current old_rt, we ensure that
+                # we will not lose an intermediate value.
+                rt = abstract_eval_phi(interp, stmt, currstate, frame)
+                old_rt = frame.ssavaluetypes[currpc]
+                rt = old_rt === NOT_FOUND ? rt : tmerge(typeinf_lattice(interp), old_rt, rt)
+            else
+                lhs = nothing
+                if isexpr(stmt, :(=))
+                    lhs = stmt.args[1]
+                    stmt = stmt.args[2]
+                end
+                if !isa(stmt, Expr)
+                    (; rt, exct, effects, refinements) = abstract_eval_special_value(interp, stmt, currstate, frame)
+                else
+                    hd = stmt.head
+                    if hd === :method
+                        fname = stmt.args[1]
+                        if isa(fname, SlotNumber)
+                            changes = StateUpdate(fname, VarState(Any, false))
+                        end
+                    elseif (hd === :code_coverage_effect || (
+                            hd !== :boundscheck && # :boundscheck can be narrowed to Bool
+                            is_meta_expr(stmt)))
+                        rt = Nothing
+                    else
+                        result = abstract_eval_statement_expr(interp, stmt, currstate, frame)::Future
+                        if !isready(result) || !isempty(frame.tasks)
+                            return CurrentState(result, currstate, bbstart, bbend)
+                            @label injectresult
+                            # reload local variables
+                            stmt = frame.src.code[currpc]
+                            changes = nothing
+                            lhs = nothing
+                            if isexpr(stmt, :(=))
+                                lhs = stmt.args[1]
+                                stmt = stmt.args[2]
+                            end
+                            result = nextresult.result::Future{RTEffects}
+                        end
+                        result = result[]
+                        (; rt, exct, effects, refinements) = result
+                        if effects.noub === NOUB_IF_NOINBOUNDS
+                            if has_curr_ssaflag(frame, IR_FLAG_INBOUNDS)
+                                effects = Effects(effects; noub=ALWAYS_FALSE)
+                            elseif !propagate_inbounds(frame)
+                                # The callee read our inbounds flag, but unless we propagate inbounds,
+                                # we ourselves don't read our parent's inbounds.
+                                effects = Effects(effects; noub=ALWAYS_TRUE)
+                            end
+                        end
+                        @assert !isa(rt, TypeVar) "unhandled TypeVar"
+                        rt = maybe_singleton_const(rt)
+                        if !isempty(frame.pclimitations)
+                            if rt isa Const || rt === Union{}
+                                empty!(frame.pclimitations)
+                            else
+                                rt = LimitedAccuracy(rt, frame.pclimitations)
+                                frame.pclimitations = IdSet{InferenceState}()
+                            end
+                        end
+                    end
+                end
+                effects === nothing || merge_override_effects!(interp, effects, frame)
+                if lhs !== nothing && rt !== Bottom
+                    if isa(lhs, SlotNumber)
+                        changes = StateUpdate(lhs, VarState(rt, false))
+                    elseif isa(lhs, GlobalRef)
+                        handle_global_assignment!(interp, frame, lhs, rt)
+                    elseif !isa(lhs, SSAValue)
+                        merge_effects!(interp, frame, EFFECTS_UNKNOWN)
+                    end
+                end
+            end
             if !has_curr_ssaflag(frame, IR_FLAG_NOTHROW)
                 if exct !== Union{}
                     update_exc_bestguess!(interp, exct, frame)
@@ -3372,6 +3670,15 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
             if changes !== nothing
                 stoverwrite1!(currstate, changes)
             end
+            if refinements isa SlotRefinement
+                apply_refinement!(𝕃ᵢ, refinements.slot, refinements.typ, currstate, changes)
+            elseif refinements isa Vector{Any}
+                for i = 1:length(refinements)
+                    newtyp = refinements[i]
+                    newtyp === nothing && continue
+                    apply_refinement!(𝕃ᵢ, SlotNumber(i), newtyp, currstate, changes)
+                end
+            end
             if rt === nothing
                 ssavaluetypes[currpc] = Any
                 continue
@@ -3406,17 +3713,31 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
         end
     end # while currbb <= nbbs
 
-    frame.dont_work_on_me = false
-    nothing
+    return CurrentState()
 end
 
-function conditional_change(𝕃ᵢ::AbstractLattice, state::VarTable, @nospecialize(typ), slot::Int)
-    vtype = state[slot]
+function apply_refinement!(𝕃ᵢ::AbstractLattice, slot::SlotNumber, @nospecialize(newtyp),
+                           currstate::VarTable, currchanges::Union{Nothing,StateUpdate})
+    if currchanges !== nothing && currchanges.var == slot
+        return # type propagation from statement (like assignment) should have the precedence
+    end
+    vtype = currstate[slot_id(slot)]
     oldtyp = vtype.typ
-    if iskindtype(typ)
+    ⊏ = strictpartialorder(𝕃ᵢ)
+    if newtyp ⊏ oldtyp
+        stmtupdate = StateUpdate(slot, VarState(newtyp, vtype.undef))
+        stoverwrite1!(currstate, stmtupdate)
+    end
+end
+
+function conditional_change(𝕃ᵢ::AbstractLattice, currstate::VarTable, condt::Conditional, then_or_else::Bool)
+    vtype = currstate[condt.slot]
+    oldtyp = vtype.typ
+    newtyp = then_or_else ? condt.thentype : condt.elsetype
+    if iskindtype(newtyp)
         # this code path corresponds to the special handling for `isa(x, iskindtype)` check
         # implemented within `abstract_call_builtin`
-    elseif ⊑(𝕃ᵢ, ignorelimited(typ), ignorelimited(oldtyp))
+    elseif ⊑(𝕃ᵢ, ignorelimited(newtyp), ignorelimited(oldtyp))
         # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
         # since we probably formed these types with `typesubstract`,
         # the comparison is likely simple
@@ -3426,30 +3747,98 @@ function conditional_change(𝕃ᵢ::AbstractLattice, state::VarTable, @nospecia
     if oldtyp isa LimitedAccuracy
         # typ is better unlimited, but we may still need to compute the tmeet with the limit
         # "causes" since we ignored those in the comparison
-        typ = tmerge(𝕃ᵢ, typ, LimitedAccuracy(Bottom, oldtyp.causes))
+        newtyp = tmerge(𝕃ᵢ, newtyp, LimitedAccuracy(Bottom, oldtyp.causes))
     end
-    return StateUpdate(SlotNumber(slot), VarState(typ, vtype.undef), state, true)
+    # if this `Conditional` is from from `@isdefined condt.slot`, refine its `undef` information
+    newundef = condt.isdefined ? !then_or_else : vtype.undef
+    return StateUpdate(SlotNumber(condt.slot), VarState(newtyp, newundef), #=conditional=#true)
 end
 
-# make as much progress on `frame` as possible (by handling cycles)
-function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
-    typeinf_local(interp, frame)
+function condition_object_change(currstate::VarTable, condt::Conditional,
+                                 condslot::SlotNumber, then_or_else::Bool)
+    vtype = currstate[slot_id(condslot)]
+    newcondt = Conditional(condt.slot,
+        then_or_else ? condt.thentype : Union{},
+        then_or_else ? Union{} : condt.elsetype)
+    return StateUpdate(condslot, VarState(newcondt, vtype.undef))
+end
 
-    # If the current frame is part of a cycle, solve the cycle before finishing
-    no_active_ips_in_callers = false
-    while !no_active_ips_in_callers
-        no_active_ips_in_callers = true
-        for caller in frame.callers_in_cycle
-            caller.dont_work_on_me && return false # cycle is above us on the stack
-            if !isempty(caller.ip)
-                # Note that `typeinf_local(interp, caller)` can potentially modify the other frames
-                # `frame.callers_in_cycle`, which is why making incremental progress requires the
-                # outer while loop.
-                typeinf_local(interp, caller)
-                no_active_ips_in_callers = false
+# make as much progress on `frame` as possible (by handling cycles)
+warnlength::Int = 2500
+function typeinf(interp::AbstractInterpreter, frame::InferenceState)
+    callstack = frame.callstack::Vector{AbsIntState}
+    nextstates = CurrentState[]
+    takenext = frame.frameid
+    minwarn = warnlength
+    takeprev = 0
+    while takenext >= frame.frameid
+        callee = takenext == 0 ? frame : callstack[takenext]::InferenceState
+        if !isempty(callstack)
+            if length(callstack) - frame.frameid >= minwarn
+                topmethod = callstack[1].linfo
+                topmethod.def isa Method || (topmethod = callstack[2].linfo)
+                print(Core.stderr, "info: inference of ", topmethod, " exceeding ", length(callstack), " frames (may be slow).\n")
+                minwarn *= 2
+            end
+            topcallee = (callstack[end]::InferenceState)
+            if topcallee.cycleid != callee.cycleid
+                callee = topcallee
+                takenext = length(callstack)
+            end
+        end
+        nextstateid = takenext + 1 - frame.frameid
+        while length(nextstates) < nextstateid
+            push!(nextstates, CurrentState())
+        end
+        if doworkloop(interp, callee)
+            # First drain the workloop. Note that since some scheduled work doesn't
+            # affect the result (e.g. cfunction or abstract_call_method on
+            # get_compileable_sig), but still must be finished up since it may see and
+            # change the local variables of the InferenceState at currpc, we do this
+            # even if the nextresult status is already completed.
+            continue
+        elseif isdefined(nextstates[nextstateid], :result) || !isempty(callee.ip)
+            # Next make progress on this frame
+            prev = length(callee.tasks) + 1
+            nextstates[nextstateid] = typeinf_local(interp, callee, nextstates[nextstateid])
+            reverse!(callee.tasks, prev)
+        elseif callee.cycleid == length(callstack)
+            # With no active ip's and no cycles, frame is done
+            finish_nocycle(interp, callee)
+            callee.frameid == 0 && break
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            #@assert length(nextstates) == nextstateid + 1
+            #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+            resize!(nextstates, nextstateid)
+        elseif callee.cycleid == callee.frameid
+            # If the current frame is the top part of a cycle, check if the whole cycle
+            # is done, and if not, pick the next item to work on.
+            no_active_ips_in_cycle = true
+            for i = callee.cycleid:length(callstack)
+                caller = callstack[i]::InferenceState
+                @assert caller.cycleid == callee.cycleid
+                if !isempty(caller.tasks) || isdefined(nextstates[i+1-frame.frameid], :result) || !isempty(caller.ip)
+                    no_active_ips_in_cycle = false
+                    break
+                end
+            end
+            if no_active_ips_in_cycle
+                finish_cycle(interp, callstack, callee.cycleid)
             end
-            update_valid_age!(caller, frame.valid_worlds)
+            takenext = length(callstack)
+            nextstateid = takenext + 1 - frame.frameid
+            if no_active_ips_in_cycle
+                #@assert all(i -> !isdefined(nextstates[i], :result), nextstateid+1:length(nextstates))
+                resize!(nextstates, nextstateid)
+            else
+                #@assert length(nextstates) == nextstateid
+            end
+        else
+            # Continue to the next frame in this cycle
+            takenext = takenext - 1
         end
     end
-    return true
+    #@assert all(nextresult -> !isdefined(nextresult, :result), nextstates)
+    return is_inferred(frame)
 end
diff --git a/base/compiler/abstractlattice.jl b/base/compiler/abstractlattice.jl
index 0102a59667c1e..645c865d085b3 100644
--- a/base/compiler/abstractlattice.jl
+++ b/base/compiler/abstractlattice.jl
@@ -288,9 +288,13 @@ has_extended_unionsplit(::JLTypeLattice) = false
 ⊑(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊑(𝕃, a, b)
 ⊏(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⊏(𝕃, a, b)
 ⋤(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> ⋤(𝕃, a, b)
+tmerge(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> tmerge(𝕃, a, b)
+tmeet(𝕃::AbstractLattice) = (@nospecialize(a), @nospecialize(b)) -> tmeet(𝕃, a, b)
 partialorder(𝕃::AbstractLattice) = ⊑(𝕃)
 strictpartialorder(𝕃::AbstractLattice) = ⊏(𝕃)
 strictneqpartialorder(𝕃::AbstractLattice) = ⋤(𝕃)
+join(𝕃::AbstractLattice) = tmerge(𝕃)
+meet(𝕃::AbstractLattice) = tmeet(𝕃)
 
 # Fallbacks for external packages using these methods
 const fallback_lattice = InferenceLattice(BaseInferenceLattice.instance)
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 9aecdaad51aa5..5cc01391267d7 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -49,10 +49,11 @@ struct EffectsOverride
     noub::Bool
     noub_if_noinbounds::Bool
     consistent_overlay::Bool
+    nortcall::Bool
 end
 function EffectsOverride(
     override::EffectsOverride =
-        EffectsOverride(false, false, false, false, false, false, false, false, false, false);
+        EffectsOverride(false, false, false, false, false, false, false, false, false, false, false);
     consistent::Bool = override.consistent,
     effect_free::Bool = override.effect_free,
     nothrow::Bool = override.nothrow,
@@ -62,7 +63,8 @@ function EffectsOverride(
     inaccessiblememonly::Bool = override.inaccessiblememonly,
     noub::Bool = override.noub,
     noub_if_noinbounds::Bool = override.noub_if_noinbounds,
-    consistent_overlay::Bool = override.consistent_overlay)
+    consistent_overlay::Bool = override.consistent_overlay,
+    nortcall::Bool = override.nortcall)
     return EffectsOverride(
         consistent,
         effect_free,
@@ -73,9 +75,10 @@ function EffectsOverride(
         inaccessiblememonly,
         noub,
         noub_if_noinbounds,
-        consistent_overlay)
+        consistent_overlay,
+        nortcall)
 end
-const NUM_EFFECTS_OVERRIDES = 10 # sync with julia.h
+const NUM_EFFECTS_OVERRIDES = 11 # sync with julia.h
 
 # essential files and libraries
 include("essentials.jl")
@@ -184,8 +187,7 @@ baremodule BuildSettings
 using Core: ARGS, include
 using Core.Compiler: >, getindex, length
 
-MAX_METHODS::Int = 3
-UNOPTIMIZE_THROW_BLOCKS::Bool = true
+global MAX_METHODS::Int = 3
 
 if length(ARGS) > 2 && ARGS[2] === "--buildsettings"
     include(BuildSettings, ARGS[3])
diff --git a/base/compiler/effects.jl b/base/compiler/effects.jl
index 0375b8dba922c..b22b9396408e3 100644
--- a/base/compiler/effects.jl
+++ b/base/compiler/effects.jl
@@ -47,7 +47,8 @@ following meanings:
   * `ALWAYS_TRUE`: this method is guaranteed to not execute any undefined behavior (for any input).
   * `ALWAYS_FALSE`: this method may execute undefined behavior.
   * `NOUB_IF_NOINBOUNDS`: this method is guaranteed to not execute any undefined behavior
-    if the caller does not set nor propagate the `@inbounds` context.
+    under the assumption that its `@checkbounds` code is not elided (which happens when the
+    caller does not set nor propagate the `@inbounds` context)
   Note that undefined behavior may technically cause the method to violate any other effect
   assertions (such as `:consistent` or `:effect_free`) as well, but we do not model this,
   and they assume the absence of undefined behavior.
@@ -58,6 +59,9 @@ following meanings:
     methods are `:consistent` with their non-overlayed original counterparts
     (see [`Base.@assume_effects`](@ref) for the exact definition of `:consistenct`-cy).
   * `ALWAYS_FALSE`: this method may invoke overlayed methods.
+- `nortcall::Bool`: this method does not call `Core.Compiler.return_type`,
+  and it is guaranteed that any other methods this method might call also do not call
+  `Core.Compiler.return_type`.
 
 Note that the representations above are just internal implementation details and thus likely
 to change in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation
@@ -103,6 +107,9 @@ The output represents the state of different effect properties in the following
     - `+o` (green): `ALWAYS_TRUE`
     - `-o` (red): `ALWAYS_FALSE`
     - `?o` (yellow): `CONSISTENT_OVERLAY`
+9. `:nortcall` (`r`):
+    - `+r` (green): `true`
+    - `-r` (red): `false`
 """
 struct Effects
     consistent::UInt8
@@ -113,6 +120,7 @@ struct Effects
     inaccessiblememonly::UInt8
     noub::UInt8
     nonoverlayed::UInt8
+    nortcall::Bool
     function Effects(
         consistent::UInt8,
         effect_free::UInt8,
@@ -121,7 +129,8 @@ struct Effects
         notaskstate::Bool,
         inaccessiblememonly::UInt8,
         noub::UInt8,
-        nonoverlayed::UInt8)
+        nonoverlayed::UInt8,
+        nortcall::Bool)
         return new(
             consistent,
             effect_free,
@@ -130,7 +139,8 @@ struct Effects
             notaskstate,
             inaccessiblememonly,
             noub,
-            nonoverlayed)
+            nonoverlayed,
+            nortcall)
     end
 end
 
@@ -160,12 +170,12 @@ const NOUB_IF_NOINBOUNDS = 0x01 << 1
 # :nonoverlayed bits
 const CONSISTENT_OVERLAY = 0x01 << 1
 
-const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE)
-const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE)
-const EFFECTS_UNKNOWN  = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_TRUE) # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
-const _EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE) # unknown really
+const EFFECTS_TOTAL   = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  true,  true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE, true)
+const EFFECTS_THROWS  = Effects(ALWAYS_TRUE,  ALWAYS_TRUE,  false, true,  true,  ALWAYS_TRUE,  ALWAYS_TRUE,  ALWAYS_TRUE, true)
+const EFFECTS_UNKNOWN = Effects(ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_TRUE, false) # unknown mostly, but it's not overlayed at least (e.g. it's not a call)
 
-function Effects(effects::Effects = _EFFECTS_UNKNOWN;
+function Effects(effects::Effects=Effects(
+    ALWAYS_FALSE, ALWAYS_FALSE, false, false, false, ALWAYS_FALSE, ALWAYS_FALSE, ALWAYS_FALSE, false);
     consistent::UInt8 = effects.consistent,
     effect_free::UInt8 = effects.effect_free,
     nothrow::Bool = effects.nothrow,
@@ -173,7 +183,8 @@ function Effects(effects::Effects = _EFFECTS_UNKNOWN;
     notaskstate::Bool = effects.notaskstate,
     inaccessiblememonly::UInt8 = effects.inaccessiblememonly,
     noub::UInt8 = effects.noub,
-    nonoverlayed::UInt8 = effects.nonoverlayed)
+    nonoverlayed::UInt8 = effects.nonoverlayed,
+    nortcall::Bool = effects.nortcall)
     return Effects(
         consistent,
         effect_free,
@@ -182,7 +193,8 @@ function Effects(effects::Effects = _EFFECTS_UNKNOWN;
         notaskstate,
         inaccessiblememonly,
         noub,
-        nonoverlayed)
+        nonoverlayed,
+        nortcall)
 end
 
 function is_better_effects(new::Effects, old::Effects)
@@ -247,6 +259,11 @@ function is_better_effects(new::Effects, old::Effects)
     elseif new.nonoverlayed != old.nonoverlayed
         return false
     end
+    if new.nortcall
+        any_improved |= !old.nortcall
+    elseif new.nortcall != old.nortcall
+        return false
+    end
     return any_improved
 end
 
@@ -259,7 +276,8 @@ function merge_effects(old::Effects, new::Effects)
         merge_effectbits(old.notaskstate, new.notaskstate),
         merge_effectbits(old.inaccessiblememonly, new.inaccessiblememonly),
         merge_effectbits(old.noub, new.noub),
-        merge_effectbits(old.nonoverlayed, new.nonoverlayed))
+        merge_effectbits(old.nonoverlayed, new.nonoverlayed),
+        merge_effectbits(old.nortcall, new.nortcall))
 end
 
 function merge_effectbits(old::UInt8, new::UInt8)
@@ -279,16 +297,18 @@ is_inaccessiblememonly(effects::Effects) = effects.inaccessiblememonly === ALWAY
 is_noub(effects::Effects)                = effects.noub === ALWAYS_TRUE
 is_noub_if_noinbounds(effects::Effects)  = effects.noub === NOUB_IF_NOINBOUNDS
 is_nonoverlayed(effects::Effects)        = effects.nonoverlayed === ALWAYS_TRUE
+is_nortcall(effects::Effects)            = effects.nortcall
 
 # implies `is_notaskstate` & `is_inaccessiblememonly`, but not explicitly checked here
-is_foldable(effects::Effects) =
+is_foldable(effects::Effects, check_rtcall::Bool=false) =
     is_consistent(effects) &&
     (is_noub(effects) || is_noub_if_noinbounds(effects)) &&
     is_effect_free(effects) &&
-    is_terminates(effects)
+    is_terminates(effects) &&
+    (!check_rtcall || is_nortcall(effects))
 
-is_foldable_nothrow(effects::Effects) =
-    is_foldable(effects) &&
+is_foldable_nothrow(effects::Effects, check_rtcall::Bool=false) =
+    is_foldable(effects, check_rtcall) &&
     is_nothrow(effects)
 
 # TODO add `is_noub` here?
@@ -318,7 +338,8 @@ function encode_effects(e::Effects)
            ((e.notaskstate         % UInt32) << 7)  |
            ((e.inaccessiblememonly % UInt32) << 8)  |
            ((e.noub                % UInt32) << 10) |
-           ((e.nonoverlayed        % UInt32) << 12)
+           ((e.nonoverlayed        % UInt32) << 12) |
+           ((e.nortcall            % UInt32) << 14)
 end
 
 function decode_effects(e::UInt32)
@@ -330,7 +351,8 @@ function decode_effects(e::UInt32)
         _Bool((e >> 7) & 0x01),
         UInt8((e >> 8) & 0x03),
         UInt8((e >> 10) & 0x03),
-        UInt8((e >> 12) & 0x03))
+        UInt8((e >> 12) & 0x03),
+        _Bool((e >> 14) & 0x01))
 end
 
 function encode_effects_override(eo::EffectsOverride)
@@ -345,6 +367,7 @@ function encode_effects_override(eo::EffectsOverride)
     eo.noub                && (e |= (0x0001 << 7))
     eo.noub_if_noinbounds  && (e |= (0x0001 << 8))
     eo.consistent_overlay  && (e |= (0x0001 << 9))
+    eo.nortcall            && (e |= (0x0001 << 10))
     return e
 end
 
@@ -359,7 +382,8 @@ function decode_effects_override(e::UInt16)
         !iszero(e & (0x0001 << 6)),
         !iszero(e & (0x0001 << 7)),
         !iszero(e & (0x0001 << 8)),
-        !iszero(e & (0x0001 << 9)))
+        !iszero(e & (0x0001 << 9)),
+        !iszero(e & (0x0001 << 10)))
 end
 
 decode_statement_effects_override(ssaflag::UInt32) =
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index c358b1177251f..5f8fb82caaa34 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -209,10 +209,10 @@ to enable flow-sensitive analysis.
 """
 const VarTable = Vector{VarState}
 
-const CACHE_MODE_NULL     = 0x00      # not cached, without optimization
-const CACHE_MODE_GLOBAL   = 0x01 << 0 # cached globally, optimization allowed
-const CACHE_MODE_LOCAL    = 0x01 << 1 # cached locally, optimization allowed
-const CACHE_MODE_VOLATILE = 0x01 << 2 # not cached, optimization allowed
+const CACHE_MODE_NULL     = 0x00      # not cached, optimization optional
+const CACHE_MODE_GLOBAL   = 0x01 << 0 # cached globally, optimization required
+const CACHE_MODE_LOCAL    = 0x01 << 1 # cached locally, optimization required
+const CACHE_MODE_VOLATILE = 0x01 << 2 # not cached, optimization required
 
 mutable struct TryCatchFrame
     exct
@@ -236,7 +236,7 @@ mutable struct InferenceState
     slottypes::Vector{Any}
     src::CodeInfo
     cfg::CFG
-    method_info::MethodInfo
+    spec_info::SpecInfo
 
     #= intermediate states for local abstract interpretation =#
     currbb::Int
@@ -251,12 +251,16 @@ mutable struct InferenceState
     stmt_info::Vector{CallInfo}
 
     #= intermediate states for interprocedural abstract interpretation =#
+    tasks::Vector{WorkThunk}
     pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
     limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
     cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
-    callers_in_cycle::Vector{InferenceState}
-    dont_work_on_me::Bool
-    parent # ::Union{Nothing,AbsIntState}
+
+    # IPO tracking of in-process work, shared with all frames given AbstractInterpreter
+    callstack #::Vector{AbsIntState}
+    parentid::Int # index into callstack of the parent frame that originally added this frame (call frame_parent to extract the current parent of the SCC)
+    frameid::Int # index into callstack at which this object is found (or zero, if this is not a cached frame and has no parent)
+    cycleid::Int # index into the callstack of the topmost frame in the cycle (all frames in the same cycle share the same cycleid)
 
     #= results =#
     result::InferenceResult # remember where to put the result
@@ -290,7 +294,7 @@ mutable struct InferenceState
         sptypes = sptypes_from_meth_instance(mi)
         code = src.code::Vector{Any}
         cfg = compute_basic_blocks(code)
-        method_info = MethodInfo(src)
+        spec_info = SpecInfo(src)
 
         currbb = currpc = 1
         ip = BitSet(1) # TODO BitSetBoundedMinPrioritySet(1)
@@ -312,6 +316,9 @@ mutable struct InferenceState
         nargtypes = length(argtypes)
         for i = 1:nslots
             argtyp = (i > nargtypes) ? Bottom : argtypes[i]
+            if argtyp === Bool && has_conditional(typeinf_lattice(interp))
+                argtyp = Conditional(i, Const(true), Const(false))
+            end
             slottypes[i] = argtyp
             bb_vartable1[i] = VarState(argtyp, i > nargtypes)
         end
@@ -321,9 +328,8 @@ mutable struct InferenceState
         pclimitations = IdSet{InferenceState}()
         limitations = IdSet{InferenceState}()
         cycle_backedges = Vector{Tuple{InferenceState,Int}}()
-        callers_in_cycle = Vector{InferenceState}()
-        dont_work_on_me = false
-        parent = nothing
+        callstack = AbsIntState[]
+        tasks = WorkThunk[]
 
         valid_worlds = WorldRange(1, get_world_counter())
         bestguess = Bottom
@@ -344,18 +350,23 @@ mutable struct InferenceState
 
         restrict_abstract_call_sites = isa(def, Module)
 
-        # some more setups
-        InferenceParams(interp).unoptimize_throw_blocks && mark_throw_blocks!(src, handler_info)
-        !iszero(cache_mode & CACHE_MODE_LOCAL) && push!(get_inference_cache(interp), result)
-
         this = new(
-            mi, world, mod, sptypes, slottypes, src, cfg, method_info,
+            mi, world, mod, sptypes, slottypes, src, cfg, spec_info,
             currbb, currpc, ip, handler_info, ssavalue_uses, bb_vartables, ssavaluetypes, stmt_edges, stmt_info,
-            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent,
+            tasks, pclimitations, limitations, cycle_backedges, callstack, 0, 0, 0,
             result, unreachable, valid_worlds, bestguess, exc_bestguess, ipo_effects,
             restrict_abstract_call_sites, cache_mode, insert_coverage,
             interp)
 
+        # some more setups
+        if !iszero(cache_mode & CACHE_MODE_LOCAL)
+            push!(get_inference_cache(interp), result)
+        end
+        if !iszero(cache_mode & CACHE_MODE_GLOBAL)
+            push!(callstack, this)
+            this.cycleid = this.frameid = length(callstack)
+        end
+
         # Apply generated function restrictions
         if src.min_world != 1 || src.max_world != typemax(UInt)
             # From generated functions
@@ -515,72 +526,6 @@ function compute_trycatch(code::Vector{Any}, bbs::Union{Vector{BasicBlock},Nothi
     return handler_info
 end
 
-function is_throw_call(e::Expr, code::Vector{Any})
-    if e.head === :call
-        f = e.args[1]
-        if isa(f, SSAValue)
-            f = code[f.id]
-        end
-        if isa(f, GlobalRef)
-            ff = abstract_eval_globalref_type(f)
-            if isa(ff, Const) && ff.val === Core.throw
-                return true
-            end
-        end
-    end
-    return false
-end
-
-function mark_throw_blocks!(src::CodeInfo, handler_info::Union{Nothing,HandlerInfo})
-    for stmt in find_throw_blocks(src.code, handler_info)
-        src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
-    end
-    return nothing
-end
-
-# this utility function is incomplete and won't catch every block that always throws, since:
-# - it only recognizes direct calls to `throw` within the target code, so it can't mark
-#   blocks that deterministically call `throw` internally, like those containing `error`.
-# - it just does a reverse linear traverse of statements, there's a chance it might miss
-#   blocks, particularly when there are reverse control edges.
-function find_throw_blocks(code::Vector{Any}, handler_info::Union{Nothing,HandlerInfo})
-    stmts = BitSet()
-    n = length(code)
-    for i in n:-1:1
-        s = code[i]
-        if isa(s, Expr)
-            if s.head === :gotoifnot
-                if i+1 in stmts && s.args[2]::Int in stmts
-                    push!(stmts, i)
-                end
-            elseif s.head === :return
-                # see `ReturnNode` handling
-            elseif is_throw_call(s, code)
-                if handler_info === nothing || handler_info.handler_at[i][1] == 0
-                    push!(stmts, i)
-                end
-            elseif i+1 in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, ReturnNode)
-            # NOTE: it potentially makes sense to treat unreachable nodes
-            # (where !isdefined(s, :val)) as `throw` points, but that can cause
-            # worse codegen around the call site (issue #37558)
-        elseif isa(s, GotoNode)
-            if s.label in stmts
-                push!(stmts, i)
-            end
-        elseif isa(s, GotoIfNot)
-            if i+1 in stmts && s.dest in stmts
-                push!(stmts, i)
-            end
-        elseif i+1 in stmts
-            push!(stmts, i)
-        end
-    end
-    return stmts
-end
-
 # check if coverage mode is enabled
 function should_insert_coverage(mod::Module, debuginfo::DebugInfo)
     coverage_enabled(mod) && return true
@@ -833,30 +778,6 @@ function empty_backedges!(frame::InferenceState, currpc::Int=frame.currpc)
     return nothing
 end
 
-function print_callstack(sv::InferenceState)
-    print("=================== Callstack: ==================\n")
-    idx = 0
-    while sv !== nothing
-        print("[")
-        print(idx)
-        if !isa(sv.interp, NativeInterpreter)
-            print(", ")
-            print(typeof(sv.interp))
-        end
-        print("] ")
-        print(sv.linfo)
-        is_cached(sv) || print("  [uncached]")
-        println()
-        for cycle in sv.callers_in_cycle
-            print(' ', cycle.linfo)
-            println()
-        end
-        sv = sv.parent
-        idx += 1
-    end
-    print("================= End callstack ==================\n")
-end
-
 function narguments(sv::InferenceState, include_va::Bool=true)
     nargs = Int(sv.src.nargs)
     if !include_va
@@ -870,7 +791,7 @@ end
 
 # TODO add `result::InferenceResult` and put the irinterp result into the inference cache?
 mutable struct IRInterpretationState
-    const method_info::MethodInfo
+    const spec_info::SpecInfo
     const ir::IRCode
     const mi::MethodInstance
     const world::UInt
@@ -881,11 +802,14 @@ mutable struct IRInterpretationState
     const ssa_refined::BitSet
     const lazyreachability::LazyCFGReachability
     valid_worlds::WorldRange
+    const tasks::Vector{WorkThunk}
     const edges::Vector{Any}
-    parent # ::Union{Nothing,AbsIntState}
+    callstack #::Vector{AbsIntState}
+    frameid::Int
+    parentid::Int
 
     function IRInterpretationState(interp::AbstractInterpreter,
-        method_info::MethodInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
+        spec_info::SpecInfo, ir::IRCode, mi::MethodInstance, argtypes::Vector{Any},
         world::UInt, min_world::UInt, max_world::UInt)
         curridx = 1
         given_argtypes = Vector{Any}(undef, length(argtypes))
@@ -904,10 +828,11 @@ mutable struct IRInterpretationState
         ssa_refined = BitSet()
         lazyreachability = LazyCFGReachability(ir)
         valid_worlds = WorldRange(min_world, max_world == typemax(UInt) ? get_world_counter() : max_world)
+        tasks = WorkThunk[]
         edges = Any[]
-        parent = nothing
-        return new(method_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
-                   ssa_refined, lazyreachability, valid_worlds, edges, parent)
+        callstack = AbsIntState[]
+        return new(spec_info, ir, mi, world, curridx, argtypes_refined, ir.sptypes, tpdum,
+                ssa_refined, lazyreachability, valid_worlds, tasks, edges, callstack, 0, 0)
     end
 end
 
@@ -920,10 +845,10 @@ function IRInterpretationState(interp::AbstractInterpreter,
     else
         isa(src, CodeInfo) || return nothing
     end
-    method_info = MethodInfo(src)
+    spec_info = SpecInfo(src)
     ir = inflate_ir(src, mi)
     argtypes = va_process_argtypes(optimizer_lattice(interp), argtypes, src.nargs, src.isva)
-    return IRInterpretationState(interp, method_info, ir, mi, argtypes, world,
+    return IRInterpretationState(interp, spec_info, ir, mi, argtypes, world,
                                  codeinst.min_world, codeinst.max_world)
 end
 
@@ -932,6 +857,29 @@ end
 
 const AbsIntState = Union{InferenceState,IRInterpretationState}
 
+function print_callstack(frame::AbsIntState)
+    print("=================== Callstack: ==================\n")
+    frames = frame.callstack::Vector{AbsIntState}
+    for idx = (frame.frameid == 0 ? 0 : 1):length(frames)
+        sv = (idx == 0 ? frame : frames[idx])
+        idx == frame.frameid && print("*")
+        print("[")
+        print(idx)
+        if sv isa InferenceState && !isa(sv.interp, NativeInterpreter)
+            print(", ")
+            print(typeof(sv.interp))
+        end
+        print("] ")
+        print(frame_instance(sv))
+        is_cached(sv) || print("  [uncached]")
+        sv.parentid == idx - 1 || print(" [parent=", sv.parentid, "]")
+        isempty(callers_in_cycle(sv)) || print(" [cycle=", sv.cycleid, "]")
+        println()
+        @assert sv.frameid == idx
+    end
+    print("================= End callstack ==================\n")
+end
+
 frame_instance(sv::InferenceState) = sv.linfo
 frame_instance(sv::IRInterpretationState) = sv.mi
 
@@ -942,8 +890,32 @@ function frame_module(sv::AbsIntState)
     return def.module
 end
 
-frame_parent(sv::InferenceState) = sv.parent::Union{Nothing,AbsIntState}
-frame_parent(sv::IRInterpretationState) = sv.parent::Union{Nothing,AbsIntState}
+function frame_parent(sv::InferenceState)
+    sv.parentid == 0 && return nothing
+    callstack = sv.callstack::Vector{AbsIntState}
+    sv = callstack[sv.cycleid]::InferenceState
+    sv.parentid == 0 && return nothing
+    return callstack[sv.parentid]
+end
+frame_parent(sv::IRInterpretationState) = sv.parentid == 0 ? nothing : (sv.callstack::Vector{AbsIntState})[sv.parentid]
+
+# add the orphan child to the parent and the parent to the child
+function assign_parentchild!(child::InferenceState, parent::AbsIntState)
+    @assert child.frameid in (0, 1)
+    child.callstack = callstack = parent.callstack::Vector{AbsIntState}
+    child.parentid = parent.frameid
+    push!(callstack, child)
+    child.cycleid = child.frameid = length(callstack)
+    nothing
+end
+function assign_parentchild!(child::IRInterpretationState, parent::AbsIntState)
+    @assert child.frameid in (0, 1)
+    child.callstack = callstack = parent.callstack::Vector{AbsIntState}
+    child.parentid = parent.frameid
+    push!(callstack, child)
+    child.frameid = length(callstack)
+    nothing
+end
 
 function is_constproped(sv::InferenceState)
     (;overridden_by_const) = sv.result
@@ -954,18 +926,15 @@ is_constproped(::IRInterpretationState) = true
 is_cached(sv::InferenceState) = !iszero(sv.cache_mode & CACHE_MODE_GLOBAL)
 is_cached(::IRInterpretationState) = false
 
-method_info(sv::InferenceState) = sv.method_info
-method_info(sv::IRInterpretationState) = sv.method_info
+spec_info(sv::InferenceState) = sv.spec_info
+spec_info(sv::IRInterpretationState) = sv.spec_info
 
-propagate_inbounds(sv::AbsIntState) = method_info(sv).propagate_inbounds
-method_for_inference_limit_heuristics(sv::AbsIntState) = method_info(sv).method_for_inference_limit_heuristics
+propagate_inbounds(sv::AbsIntState) = spec_info(sv).propagate_inbounds
+method_for_inference_limit_heuristics(sv::AbsIntState) = spec_info(sv).method_for_inference_limit_heuristics
 
 frame_world(sv::InferenceState) = sv.world
 frame_world(sv::IRInterpretationState) = sv.world
 
-callers_in_cycle(sv::InferenceState) = sv.callers_in_cycle
-callers_in_cycle(sv::IRInterpretationState) = ()
-
 function is_effect_overridden(sv::AbsIntState, effect::Symbol)
     if is_effect_overridden(frame_instance(sv), effect)
         return true
@@ -1002,20 +971,42 @@ Note that cycles may be visited in any order.
 struct AbsIntStackUnwind
     sv::AbsIntState
 end
-iterate(unw::AbsIntStackUnwind) = (unw.sv, (unw.sv, 0))
-function iterate(unw::AbsIntStackUnwind, (sv, cyclei)::Tuple{AbsIntState, Int})
-    # iterate through the cycle before walking to the parent
-    callers = callers_in_cycle(sv)
-    if callers !== () && cyclei < length(callers)
-        cyclei += 1
-        parent = callers[cyclei]
-    else
-        cyclei = 0
-        parent = frame_parent(sv)
+iterate(unw::AbsIntStackUnwind) = (unw.sv, length(unw.sv.callstack::Vector{AbsIntState}))
+function iterate(unw::AbsIntStackUnwind, frame::Int)
+    frame == 0 && return nothing
+    return ((unw.sv.callstack::Vector{AbsIntState})[frame], frame - 1)
+end
+
+struct AbsIntCycle
+    frames::Vector{AbsIntState}
+    cycleid::Int
+    cycletop::Int
+end
+iterate(unw::AbsIntCycle) = unw.cycleid == 0 ? nothing : (unw.frames[unw.cycletop], unw.cycletop)
+function iterate(unw::AbsIntCycle, frame::Int)
+    frame == unw.cycleid && return nothing
+    return (unw.frames[frame - 1], frame - 1)
+end
+
+"""
+    callers_in_cycle(sv::AbsIntState)
+
+Iterate through all callers of the given `AbsIntState` in the abstract
+interpretation stack (including the given `AbsIntState` itself) that are part
+of the same cycle, only if it is part of a cycle with multiple frames.
+"""
+function callers_in_cycle(sv::InferenceState)
+    callstack = sv.callstack::Vector{AbsIntState}
+    cycletop = cycleid = sv.cycleid
+    while cycletop < length(callstack)
+        frame = callstack[cycletop + 1]
+        frame isa InferenceState || break
+        frame.cycleid == cycleid || break
+        cycletop += 1
     end
-    parent === nothing && return nothing
-    return (parent, (parent, cyclei))
+    return AbsIntCycle(callstack, cycletop == cycleid ? 0 : cycleid, cycletop)
 end
+callers_in_cycle(sv::IRInterpretationState) = AbsIntCycle(sv.callstack::Vector{AbsIntState}, 0, 0)
 
 # temporarily accumulate our edges to later add as backedges in the callee
 function add_backedge!(caller::InferenceState, mi::MethodInstance)
@@ -1070,6 +1061,7 @@ function merge_effects!(::AbstractInterpreter, caller::InferenceState, effects::
         effects = Effects(effects; effect_free=ALWAYS_TRUE)
     end
     caller.ipo_effects = merge_effects(caller.ipo_effects, effects)
+    nothing
 end
 merge_effects!(::AbstractInterpreter, ::IRInterpretationState, ::Effects) = return
 
@@ -1099,30 +1091,6 @@ bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::InferenceStat
 bail_out_apply(::AbstractInterpreter, state::InferenceLoopState, ::IRInterpretationState) =
     state.rt === Any
 
-function should_infer_this_call(interp::AbstractInterpreter, sv::InferenceState)
-    if InferenceParams(interp).unoptimize_throw_blocks
-        # Disable inference of calls in throw blocks, since we're unlikely to
-        # need their types. There is one exception however: If up until now, the
-        # function has not seen any side effects, we would like to make sure there
-        # aren't any in the throw block either to enable other optimizations.
-        if is_stmt_throw_block(get_curr_ssaflag(sv))
-            should_infer_for_effects(sv) || return false
-        end
-    end
-    return true
-end
-function should_infer_for_effects(sv::InferenceState)
-    def = sv.linfo.def
-    def isa Method || return false # toplevel frame will not be [semi-]concrete-evaluated
-    effects = sv.ipo_effects
-    override = decode_effects_override(def.purity)
-    effects.consistent === ALWAYS_FALSE && !is_effect_overridden(override, :consistent) && return false
-    effects.effect_free === ALWAYS_FALSE && !is_effect_overridden(override, :effect_free) && return false
-    !effects.terminates && !is_effect_overridden(override, :terminates_globally) && return false
-    return true
-end
-should_infer_this_call(::AbstractInterpreter, ::IRInterpretationState) = true
-
 add_remark!(::AbstractInterpreter, ::InferenceState, remark) = return
 add_remark!(::AbstractInterpreter, ::IRInterpretationState, remark) = return
 
@@ -1156,3 +1124,90 @@ function get_max_methods_for_module(mod::Module)
     max_methods < 0 && return nothing
     return max_methods
 end
+
+"""
+    Future{T}
+
+Delayed return value for a value of type `T`, similar to RefValue{T}, but
+explicitly represents completed as a `Bool` rather than as `isdefined`.
+Set once with `f[] = v` and accessed with `f[]` afterwards.
+
+Can also be constructed with the `completed` flag value and a closure to
+produce `x`, as well as the additional arguments to avoid always capturing the
+same couple of values.
+"""
+struct Future{T}
+    later::Union{Nothing,RefValue{T}}
+    now::Union{Nothing,T}
+    Future{T}() where {T} = new{T}(RefValue{T}(), nothing)
+    Future{T}(x) where {T} = new{T}(nothing, x)
+    Future(x::T) where {T} = new{T}(nothing, x)
+end
+isready(f::Future) = f.later === nothing
+getindex(f::Future{T}) where {T} = (later = f.later; later === nothing ? f.now::T : later[])
+setindex!(f::Future, v) = something(f.later)[] = v
+convert(::Type{Future{T}}, x) where {T} = Future{T}(x) # support return type conversion
+convert(::Type{Future{T}}, x::Future) where {T} = x::Future{T}
+function Future{T}(f, immediate::Bool, interp::AbstractInterpreter, sv::AbsIntState) where {T}
+    if immediate
+        return Future{T}(f(interp, sv))
+    else
+        @assert applicable(f, interp, sv)
+        result = Future{T}()
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(interp, sv)
+            return true
+        end)
+        return result
+    end
+end
+function Future{T}(f, prev::Future{S}, interp::AbstractInterpreter, sv::AbsIntState) where {T, S}
+    later = prev.later
+    if later === nothing
+        return Future{T}(f(prev[], interp, sv))
+    else
+        @assert Core._hasmethod(Tuple{Core.Typeof(f), S, typeof(interp), typeof(sv)})
+        result = Future{T}()
+        push!(sv.tasks, function (interp, sv)
+            result[] = f(later[], interp, sv) # capture just later, instead of all of prev
+            return true
+        end)
+        return result
+    end
+end
+
+
+"""
+    doworkloop(args...)
+
+Run a tasks inside the abstract interpreter, returning false if there are none.
+Tasks will be run in DFS post-order tree order, such that all child tasks will
+be run in the order scheduled, prior to running any subsequent tasks. This
+allows tasks to generate more child tasks, which will be run before anything else.
+Each task will be run repeatedly when returning `false`, until it returns `true`.
+"""
+function doworkloop(interp::AbstractInterpreter, sv::AbsIntState)
+    tasks = sv.tasks
+    prev = length(tasks)
+    prev == 0 && return false
+    task = pop!(tasks)
+    completed = task(interp, sv)
+    tasks = sv.tasks # allow dropping gc root over the previous call
+    completed isa Bool || throw(TypeError(:return, "", Bool, task)) # print the task on failure as part of the error message, instead of just "@ workloop:line"
+    completed || push!(tasks, task)
+    # efficient post-order visitor: items pushed are executed in reverse post order such
+    # that later items are executed before earlier ones, but are fully executed
+    # (including any dependencies scheduled by them) before going on to the next item
+    reverse!(tasks, #=start=#prev)
+    return true
+end
+
+
+#macro workthunk(name::Symbol, body)
+#    name = esc(name)
+#    body = esc(body)
+#    return replace_linenums!(
+#        :(function $name($(esc(interp)), $(esc(sv)))
+#              $body
+#          end), __source__)
+#end
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 85d4a92b3919a..02f6b46e2e73f 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -23,34 +23,35 @@ const IR_FLAG_INBOUNDS    = one(UInt32) << 0
 const IR_FLAG_INLINE      = one(UInt32) << 1
 # This statement is marked as @noinline by user
 const IR_FLAG_NOINLINE    = one(UInt32) << 2
-# This statement is on a code path that eventually `throw`s.
-const IR_FLAG_THROW_BLOCK = one(UInt32) << 3
 # An optimization pass has updated this statement in a way that may
 # have exposed information that inference did not see. Re-running
 # inference on this statement may be profitable.
-const IR_FLAG_REFINED     = one(UInt32) << 4
+const IR_FLAG_REFINED     = one(UInt32) << 3
 # This statement is proven :consistent
-const IR_FLAG_CONSISTENT  = one(UInt32) << 5
+const IR_FLAG_CONSISTENT  = one(UInt32) << 4
 # This statement is proven :effect_free
-const IR_FLAG_EFFECT_FREE = one(UInt32) << 6
+const IR_FLAG_EFFECT_FREE = one(UInt32) << 5
 # This statement is proven :nothrow
-const IR_FLAG_NOTHROW     = one(UInt32) << 7
+const IR_FLAG_NOTHROW     = one(UInt32) << 6
 # This statement is proven :terminates
-const IR_FLAG_TERMINATES  = one(UInt32) << 8
+const IR_FLAG_TERMINATES  = one(UInt32) << 7
 # This statement is proven :noub
-const IR_FLAG_NOUB        = one(UInt32) << 9
+const IR_FLAG_NOUB        = one(UInt32) << 8
 # TODO: Both of these should eventually go away once
 # This statement is :effect_free == EFFECT_FREE_IF_INACCESSIBLEMEMONLY
-const IR_FLAG_EFIIMO      = one(UInt32) << 10
+const IR_FLAG_EFIIMO      = one(UInt32) << 9
 # This statement is :inaccessiblememonly == INACCESSIBLEMEM_OR_ARGMEMONLY
-const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 11
+const IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM = one(UInt32) << 10
+# This statement is :nortcall
+const IR_FLAG_NORTCALL    = one(UInt32) << 11
 # This statement has no users and may be deleted if flags get refined to IR_FLAGS_REMOVABLE
 const IR_FLAG_UNUSED      = one(UInt32) << 12
 
 const NUM_IR_FLAGS = 13 # sync with julia.h
 
 const IR_FLAGS_EFFECTS =
-    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES | IR_FLAG_NOUB
+    IR_FLAG_CONSISTENT | IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW |
+    IR_FLAG_TERMINATES | IR_FLAG_NOUB | IR_FLAG_NORTCALL
 
 const IR_FLAGS_REMOVABLE = IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW | IR_FLAG_TERMINATES
 
@@ -58,6 +59,12 @@ const IR_FLAGS_NEEDS_EA = IR_FLAG_EFIIMO | IR_FLAG_INACCESSIBLEMEM_OR_ARGMEM
 
 has_flag(curr::UInt32, flag::UInt32) = (curr & flag) == flag
 
+function iscallstmt(@nospecialize stmt)
+    stmt isa Expr || return false
+    head = stmt.head
+    return head === :call || head === :invoke || head === :foreigncall
+end
+
 function flags_for_effects(effects::Effects)
     flags = zero(UInt32)
     if is_consistent(effects)
@@ -80,6 +87,9 @@ function flags_for_effects(effects::Effects)
     if is_noub(effects)
         flags |= IR_FLAG_NOUB
     end
+    if is_nortcall(effects)
+        flags |= IR_FLAG_NORTCALL
+    end
     return flags
 end
 
@@ -249,9 +259,8 @@ end
 
 _topmod(sv::OptimizationState) = _topmod(sv.mod)
 
-is_stmt_inline(stmt_flag::UInt32)      = has_flag(stmt_flag, IR_FLAG_INLINE)
-is_stmt_noinline(stmt_flag::UInt32)    = has_flag(stmt_flag, IR_FLAG_NOINLINE)
-is_stmt_throw_block(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_THROW_BLOCK)
+is_stmt_inline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_INLINE)
+is_stmt_noinline(stmt_flag::UInt32) = has_flag(stmt_flag, IR_FLAG_NOINLINE)
 
 function new_expr_effect_flags(𝕃ₒ::AbstractLattice, args::Vector{Any}, src::Union{IRCode,IncrementalCompact}, pattern_match=nothing)
     Targ = args[1]
@@ -377,7 +386,7 @@ function recompute_effects_flags(𝕃ₒ::AbstractLattice, @nospecialize(stmt),
     elseif nothrow
         flag |= IR_FLAG_NOTHROW
     end
-    if !(isexpr(stmt, :call) || isexpr(stmt, :invoke))
+    if !iscallstmt(stmt)
         # There is a bit of a subtle point here, which is that some non-call
         # statements (e.g. PiNode) can be UB:, however, we consider it
         # illegal to introduce such statements that actually cause UB (for any
@@ -586,26 +595,28 @@ mutable struct PostOptAnalysisState
     all_nothrow::Bool
     all_noub::Bool
     any_conditional_ub::Bool
+    nortcall::Bool
     function PostOptAnalysisState(result::InferenceResult, ir::IRCode)
         inconsistent = BitSetBoundedMinPrioritySet(length(ir.stmts))
         tpdum = TwoPhaseDefUseMap(length(ir.stmts))
         lazypostdomtree = LazyPostDomtree(ir)
         lazyagdomtree = LazyAugmentedDomtree(ir)
         return new(result, ir, inconsistent, tpdum, lazypostdomtree, lazyagdomtree, Int[],
-                   true, true, nothing, true, true, false)
+                   true, true, nothing, true, true, false, true)
     end
 end
 
 give_up_refinements!(sv::PostOptAnalysisState) =
     sv.all_retpaths_consistent = sv.all_effect_free = sv.effect_free_if_argmem_only =
-    sv.all_nothrow = sv.all_noub = false
+    sv.all_nothrow = sv.all_noub = sv.nortcall = false
 
 function any_refinable(sv::PostOptAnalysisState)
     effects = sv.result.ipo_effects
     return ((!is_consistent(effects) & sv.all_retpaths_consistent) |
             (!is_effect_free(effects) & sv.all_effect_free) |
             (!is_nothrow(effects) & sv.all_nothrow) |
-            (!is_noub(effects) & sv.all_noub))
+            (!is_noub(effects) & sv.all_noub) |
+            (!is_nortcall(effects) & sv.nortcall))
 end
 
 struct GetNativeEscapeCache{CodeCache}
@@ -633,10 +644,10 @@ function ((; code_cache)::GetNativeEscapeCache)(mi::MethodInstance)
     return false
 end
 
-function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState)
+function refine_effects!(interp::AbstractInterpreter, opt::OptimizationState, sv::PostOptAnalysisState)
     if !is_effect_free(sv.result.ipo_effects) && sv.all_effect_free && !isempty(sv.ea_analysis_pending)
         ir = sv.ir
-        nargs = length(ir.argtypes)
+        nargs = Int(opt.src.nargs)
         estate = EscapeAnalysis.analyze_escapes(ir, nargs, optimizer_lattice(interp), GetNativeEscapeCache(interp))
         argescapes = EscapeAnalysis.ArgEscapeCache(estate)
         stack_analysis_result!(sv.result, argescapes)
@@ -650,7 +661,8 @@ function refine_effects!(interp::AbstractInterpreter, sv::PostOptAnalysisState)
         effect_free = sv.all_effect_free ? ALWAYS_TRUE :
                       sv.effect_free_if_argmem_only === true ? EFFECT_FREE_IF_INACCESSIBLEMEMONLY : effects.effect_free,
         nothrow = sv.all_nothrow ? true : effects.nothrow,
-        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub)
+        noub = sv.all_noub ? (sv.any_conditional_ub ? NOUB_IF_NOINBOUNDS : ALWAYS_TRUE) : effects.noub,
+        nortcall = sv.nortcall ? true : effects.nortcall)
     return true
 end
 
@@ -690,6 +702,8 @@ function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospeci
     else
         return false
     end
+    has_no_escape(x::EscapeAnalysis.EscapeInfo) =
+        EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(x))
     for i = startidx:length(stmt.args)
         arg = stmt.args[i]
         argt = argextype(arg, ir)
@@ -698,7 +712,7 @@ function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospeci
         end
         # See if we can find the allocation
         if isa(arg, Argument)
-            if EscapeAnalysis.has_no_escape(EscapeAnalysis.ignore_argescape(estate[arg]))
+            if has_no_escape(estate[arg])
                 # Even if we prove everything else effect_free, the best we can
                 # say is :effect_free_if_argmem_only
                 if sv.effect_free_if_argmem_only === nothing
@@ -709,7 +723,7 @@ function check_all_args_noescape!(sv::PostOptAnalysisState, ir::IRCode, @nospeci
             end
             return false
         elseif isa(arg, SSAValue)
-            EscapeAnalysis.has_no_escape(estate[arg]) || return false
+            has_no_escape(estate[arg]) || return false
             check_all_args_noescape!(sv, ir, ir[arg][:stmt], estate) || return false
         else
             return false
@@ -775,6 +789,13 @@ function scan_non_dataflow_flags!(inst::Instruction, sv::PostOptAnalysisState)
             sv.all_noub = false
         end
     end
+    if !has_flag(flag, IR_FLAG_NORTCALL)
+        # if a function call that might invoke `Core.Compiler.return_type` has been deleted,
+        # there's no need to taint with `:nortcall`, allowing concrete evaluation
+        if iscallstmt(stmt)
+            sv.nortcall = false
+        end
+    end
 end
 
 function scan_inconsistency!(inst::Instruction, sv::PostOptAnalysisState)
@@ -918,7 +939,8 @@ function check_inconsistentcy!(sv::PostOptAnalysisState, scanner::BBScanner)
     end
 end
 
-function ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, result::InferenceResult)
+function ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
+                                ir::IRCode, result::InferenceResult)
     if !is_ipo_dataflow_analysis_profitable(result.ipo_effects)
         return false
     end
@@ -946,13 +968,13 @@ function ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, result:
         end
     end
 
-    return refine_effects!(interp, sv)
+    return refine_effects!(interp, opt, sv)
 end
 
 # run the optimization work
 function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
-    @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt, caller)
-    ipo_dataflow_analysis!(interp, ir, caller)
+    @timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt)
+    ipo_dataflow_analysis!(interp, opt, ir, caller)
     return finish(interp, opt, ir, caller)
 end
 
@@ -974,7 +996,6 @@ matchpass(::Nothing, _, _) = false
 function run_passes_ipo_safe(
     ci::CodeInfo,
     sv::OptimizationState,
-    caller::InferenceResult,
     optimize_until = nothing,  # run all passes by default
 )
     __stage__ = 0  # used by @pass
@@ -1272,7 +1293,7 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y)
 isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
 
 function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{VarState},
-                        params::OptimizationParams, error_path::Bool = false)
+                        params::OptimizationParams)
     #=const=# UNKNOWN_CALL_COST = 20
     head = ex.head
     if is_meta_expr_head(head)
@@ -1333,10 +1354,10 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
                 return 0
             elseif (f === Core.memoryrefget || f === Core.memoryref_isassigned) && length(ex.args) >= 3
                 atyp = argextype(ex.args[2], src, sptypes)
-                return isknowntype(atyp) ? 1 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+                return isknowntype(atyp) ? 1 : params.inline_nonleaf_penalty
             elseif f === Core.memoryrefset! && length(ex.args) >= 3
                 atyp = argextype(ex.args[2], src, sptypes)
-                return isknowntype(atyp) ? 5 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+                return isknowntype(atyp) ? 5 : params.inline_nonleaf_penalty
             elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
                 return 1
             end
@@ -1352,7 +1373,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         if extyp === Union{}
             return 0
         end
-        return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+        return params.inline_nonleaf_penalty
     elseif head === :foreigncall
         foreigncall = ex.args[1]
         if foreigncall isa QuoteNode && foreigncall.value === :jl_string_ptr
@@ -1375,7 +1396,7 @@ function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptyp
         end
         a = ex.args[2]
         if a isa Expr
-            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params, error_path))
+            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, params))
         end
         return cost
     elseif head === :copyast
@@ -1389,8 +1410,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod
     thiscost = 0
     dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
     if stmt isa Expr
-        thiscost = statement_cost(stmt, line, src, sptypes, params,
-                                  is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
+        thiscost = statement_cost(stmt, line, src, sptypes, params)::Int
     elseif stmt isa GotoNode
         # loops are generally always expensive
         # but assume that forward jumps are already counted for from
diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
index f74cb90e6ab51..a0abacb617085 100644
--- a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
+++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
@@ -24,10 +24,10 @@ using ._TOP_MOD:     # Base definitions
     isempty, ismutabletype, keys, last, length, max, min, missing, pop!, push!, pushfirst!,
     unwrap_unionall, !, !=, !==, &, *, +, -, :, <, <<, =>, >, |, ∈, ∉, ∩, ∪, ≠, ≤, ≥, ⊆
 using Core.Compiler: # Core.Compiler specific definitions
-    Bottom, IRCode, IR_FLAG_NOTHROW, InferenceResult, SimpleInferenceLattice,
+    AbstractLattice, Bottom, IRCode, IR_FLAG_NOTHROW, InferenceResult, SimpleInferenceLattice,
     argextype, fieldcount_noerror, hasintersect, has_flag, intrinsic_nothrow,
-    is_meta_expr_head, isbitstype, isexpr, println, setfield!_nothrow, singleton_type,
-    try_compute_field, try_compute_fieldidx, widenconst, ⊑, AbstractLattice
+    is_meta_expr_head, is_mutation_free_argtype, isexpr, println, setfield!_nothrow,
+    singleton_type, try_compute_field, try_compute_fieldidx, widenconst, ⊑
 
 include(x) = _TOP_MOD.include(@__MODULE__, x)
 if _TOP_MOD === Core.Compiler
@@ -732,11 +732,13 @@ function compute_frameinfo(ir::IRCode)
         inst = ir[SSAValue(idx)]
         stmt = inst[:stmt]
         if isa(stmt, EnterNode)
-            @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
-            tryregions === nothing && (tryregions = UnitRange{Int}[])
             leave_block = stmt.catch_dest
-            leave_pc = first(ir.cfg.blocks[leave_block].stmts)
-            push!(tryregions, idx:leave_pc)
+            if leave_block ≠ 0
+                @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
+                tryregions === nothing && (tryregions = UnitRange{Int}[])
+                leave_pc = first(ir.cfg.blocks[leave_block].stmts)
+                push!(tryregions, idx:leave_pc)
+            end
         elseif arrayinfo !== nothing
             # TODO this super limited alias analysis is able to handle only very simple cases
             # this should be replaced with a proper forward dimension analysis
@@ -857,7 +859,7 @@ function add_escape_change!(astate::AnalysisState, @nospecialize(x), xinfo::Esca
     xinfo === ⊥ && return nothing # performance optimization
     xidx = iridx(x, astate.estate)
     if xidx !== nothing
-        if force || !isbitstype(widenconst(argextype(x, astate.ir)))
+        if force || !is_mutation_free_argtype(argextype(x, astate.ir))
             push!(astate.changes, EscapeChange(xidx, xinfo))
         end
     end
@@ -867,7 +869,7 @@ end
 function add_liveness_change!(astate::AnalysisState, @nospecialize(x), livepc::Int)
     xidx = iridx(x, astate.estate)
     if xidx !== nothing
-        if !isbitstype(widenconst(argextype(x, astate.ir)))
+        if !is_mutation_free_argtype(argextype(x, astate.ir))
             push!(astate.changes, LivenessChange(xidx, livepc))
         end
     end
@@ -1213,6 +1215,7 @@ escape_builtin!(::typeof(Core.donotdelete), _...) = false
 # not really safe, but `ThrownEscape` will be imposed later
 escape_builtin!(::typeof(isdefined), _...) = false
 escape_builtin!(::typeof(throw), _...) = false
+escape_builtin!(::typeof(Core.throw_methoderror), _...) = false
 
 function escape_builtin!(::typeof(ifelse), astate::AnalysisState, pc::Int, args::Vector{Any})
     length(args) == 4 || return false
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index a77a67ab262de..5017b619469ff 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -12,6 +12,8 @@ struct InliningTodo
     mi::MethodInstance
     # The IR of the inlinee
     ir::IRCode
+    # The SpecInfo for the inlinee
+    spec_info::SpecInfo
     # The DebugInfo table for the inlinee
     di::DebugInfo
     # If the function being inlined is a single basic block we can use a
@@ -20,8 +22,8 @@ struct InliningTodo
     # Effects of the call statement
     effects::Effects
 end
-function InliningTodo(mi::MethodInstance, (ir, di)::Tuple{IRCode, DebugInfo}, effects::Effects)
-    return InliningTodo(mi, ir, di, linear_inline_eligible(ir), effects)
+function InliningTodo(mi::MethodInstance, ir::IRCode, spec_info::SpecInfo, di::DebugInfo, effects::Effects)
+    return InliningTodo(mi, ir, spec_info, di, linear_inline_eligible(ir), effects)
 end
 
 struct ConstantCase
@@ -50,12 +52,13 @@ struct InliningCase
 end
 
 struct UnionSplit
-    fully_covered::Bool
+    handled_all_cases::Bool # All possible dispatches are included in the cases
+    fully_covered::Bool # All handled cases are fully covering
     atype::DataType
     cases::Vector{InliningCase}
     bbs::Vector{Int}
-    UnionSplit(fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) =
-        new(fully_covered, atype, cases, Int[])
+    UnionSplit(handled_all_cases::Bool, fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) =
+        new(handled_all_cases, fully_covered, atype, cases, Int[])
 end
 
 struct InliningEdgeTracker
@@ -215,7 +218,7 @@ end
 
 function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit,
                                 state::CFGInliningState, params::OptimizationParams)
-    (; fully_covered, #=atype,=# cases, bbs) = union_split
+    (; handled_all_cases, fully_covered, #=atype,=# cases, bbs) = union_split
     inline_into_block!(state, block_for_inst(ir, idx))
     from_bbs = Int[]
     delete!(state.split_targets, length(state.new_cfg_blocks))
@@ -235,7 +238,7 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit,
             end
         end
         push!(from_bbs, length(state.new_cfg_blocks))
-        if !(i == length(cases) && fully_covered)
+        if !(i == length(cases) && (handled_all_cases && fully_covered))
             # This block will have the next condition or the final else case
             push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
             push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks))
@@ -244,7 +247,10 @@ function cfg_inline_unionsplit!(ir::IRCode, idx::Int, union_split::UnionSplit,
         end
     end
     # The edge from the fallback block.
-    fully_covered || push!(from_bbs, length(state.new_cfg_blocks))
+    # NOTE This edge is only required for `!handled_all_cases` and not `!fully_covered`,
+    #      since in the latter case we inline `Core.throw_methoderror` into the fallback
+    #      block, which is must-throw, making the subsequent code path unreachable.
+    !handled_all_cases && push!(from_bbs, length(state.new_cfg_blocks))
     # This block will be the block everyone returns to
     push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx), from_bbs, orig_succs))
     join_bb = length(state.new_cfg_blocks)
@@ -317,7 +323,8 @@ function ir_inline_linetable!(debuginfo::DebugInfoStream, inlinee_debuginfo::Deb
 end
 
 function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCode, IncrementalCompact},
-                              ir::IRCode, di::DebugInfo, mi::MethodInstance, inlined_at::NTuple{3,Int32}, argexprs::Vector{Any})
+                              ir::IRCode, spec_info::SpecInfo, di::DebugInfo, mi::MethodInstance,
+                              inlined_at::NTuple{3,Int32}, argexprs::Vector{Any})
     def = mi.def::Method
     debuginfo = inline_target isa IRCode ? inline_target.debuginfo : inline_target.ir.debuginfo
     topline = new_inlined_at = ir_inline_linetable!(debuginfo, di, inlined_at)
@@ -330,8 +337,8 @@ function ir_prepare_inlining!(insert_node!::Inserter, inline_target::Union{IRCod
         spvals_ssa = insert_node!(
             removable_if_unused(NewInstruction(Expr(:call, Core._compute_sparams, def, argexprs...), SimpleVector, topline)))
     end
-    if def.isva
-        nargs_def = Int(def.nargs::Int32)
+    if spec_info.isva
+        nargs_def = spec_info.nargs
         if nargs_def > 0
             argexprs = fix_va_argexprs!(insert_node!, inline_target, argexprs, nargs_def, topline)
         end
@@ -358,7 +365,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                          item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     # Ok, do the inlining here
     inlined_at = compact.result[idx][:line]
-    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.di, item.mi, inlined_at, argexprs)
+    ssa_substitute = ir_prepare_inlining!(InsertHere(compact), compact, item.ir, item.spec_info, item.di, item.mi, inlined_at, argexprs)
     boundscheck = has_flag(compact.result[idx], IR_FLAG_INBOUNDS) ? :off : boundscheck
 
     # If the iterator already moved on to the next basic block,
@@ -523,7 +530,7 @@ assuming their order stays the same post-discovery in `ml_matches`.
 function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
                                union_split::UnionSplit, boundscheck::Symbol,
                                todo_bbs::Vector{Tuple{Int,Int}}, interp::AbstractInterpreter)
-    (; fully_covered, atype, cases, bbs) = union_split
+    (; handled_all_cases, fully_covered, atype, cases, bbs) = union_split
     stmt, typ, line = compact.result[idx][:stmt], compact.result[idx][:type], compact.result[idx][:line]
     join_bb = bbs[end]
     pn = PhiNode()
@@ -538,7 +545,7 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::
         cond = true
         nparams = fieldcount(atype)
         @assert nparams == fieldcount(mtype)
-        if !(i == ncases && fully_covered)
+        if !(i == ncases && fully_covered && handled_all_cases)
             for i = 1:nparams
                 aft, mft = fieldtype(atype, i), fieldtype(mtype, i)
                 # If this is always true, we don't need to check for it
@@ -597,14 +604,18 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::
     end
     bb += 1
     # We're now in the fall through block, decide what to do
-    if !fully_covered
+    if !handled_all_cases
         ssa = insert_node_here!(compact, NewInstruction(stmt, typ, line))
         push!(pn.edges, bb)
         push!(pn.values, ssa)
         insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Any, line))
         finish_current_bb!(compact, 0)
+    elseif !fully_covered
+        insert_node_here!(compact, NewInstruction(Expr(:call, GlobalRef(Core, :throw_methoderror), argexprs...), Union{}, line))
+        insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line))
+        finish_current_bb!(compact, 0)
+        ncases == 0 && return insert_node_here!(compact, NewInstruction(nothing, Any, line))
     end
-
     # We're now in the join block.
     return insert_node_here!(compact, NewInstruction(pn, typ, line))
 end
@@ -852,15 +863,14 @@ function resolve_todo(mi::MethodInstance, result::Union{Nothing,InferenceResult,
     if inferred_result isa ConstantCase
         add_inlining_backedge!(et, mi)
         return inferred_result
-    end
-    if inferred_result isa InferredResult
+    elseif inferred_result isa InferredResult
         (; src, effects) = inferred_result
     elseif inferred_result isa CodeInstance
         src = @atomic :monotonic inferred_result.inferred
         effects = decode_effects(inferred_result.ipo_purity_bits)
-    else
-        src = nothing
-        effects = Effects()
+    else # there is no cached source available, bail out
+        return compileable_specialization(mi, Effects(), et, info;
+            compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
     end
 
     # the duplicated check might have been done already within `analyze_method!`, but still
@@ -875,9 +885,12 @@ function resolve_todo(mi::MethodInstance, result::Union{Nothing,InferenceResult,
             compilesig_invokes=OptimizationParams(state.interp).compilesig_invokes)
 
     add_inlining_backedge!(et, mi)
-    ir = inferred_result isa CodeInstance  ? retrieve_ir_for_inlining(inferred_result, src) :
-                                             retrieve_ir_for_inlining(mi, src, preserve_local_sources)
-    return InliningTodo(mi, ir, effects)
+    if inferred_result isa CodeInstance
+        ir, spec_info, debuginfo = retrieve_ir_for_inlining(inferred_result, src)
+    else
+        ir, spec_info, debuginfo = retrieve_ir_for_inlining(mi, src, preserve_local_sources)
+    end
+    return InliningTodo(mi, ir, spec_info, debuginfo, effects)
 end
 
 # the special resolver for :invoke-d call
@@ -893,23 +906,17 @@ function resolve_todo(mi::MethodInstance, @nospecialize(info::CallInfo), flag::U
     if cached_result isa ConstantCase
         add_inlining_backedge!(et, mi)
         return cached_result
-    end
-    if cached_result isa InferredResult
-        (; src, effects) = cached_result
     elseif cached_result isa CodeInstance
         src = @atomic :monotonic cached_result.inferred
         effects = decode_effects(cached_result.ipo_purity_bits)
-    else
-        src = nothing
-        effects = Effects()
+    else # there is no cached source available, bail out
+        return nothing
     end
 
-    preserve_local_sources = true
     src_inlining_policy(state.interp, src, info, flag) || return nothing
-    ir = cached_result isa CodeInstance  ? retrieve_ir_for_inlining(cached_result, src) :
-                                           retrieve_ir_for_inlining(mi, src, preserve_local_sources)
+    ir, spec_info, debuginfo = retrieve_ir_for_inlining(cached_result, src)
     add_inlining_backedge!(et, mi)
-    return InliningTodo(mi, ir, effects)
+    return InliningTodo(mi, ir, spec_info, debuginfo, effects)
 end
 
 function validate_sparams(sparams::SimpleVector)
@@ -963,22 +970,29 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
     return resolve_todo(mi, volatile_inf_result, info, flag, state; invokesig)
 end
 
-function retrieve_ir_for_inlining(cached_result::CodeInstance, src::MaybeCompressed)
-    src = _uncompressed_ir(cached_result, src)::CodeInfo
-    return inflate_ir!(src, cached_result.def), src.debuginfo
+function retrieve_ir_for_inlining(cached_result::CodeInstance, src::String)
+    src = _uncompressed_ir(cached_result, src)
+    return inflate_ir!(src, cached_result.def), SpecInfo(src), src.debuginfo
+end
+function retrieve_ir_for_inlining(cached_result::CodeInstance, src::CodeInfo)
+    return inflate_ir!(copy(src), cached_result.def), SpecInfo(src), src.debuginfo
 end
 function retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo, preserve_local_sources::Bool)
     if preserve_local_sources
         src = copy(src)
     end
-    return inflate_ir!(src, mi), src.debuginfo
+    return inflate_ir!(src, mi), SpecInfo(src), src.debuginfo
 end
 function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local_sources::Bool)
     if preserve_local_sources
         ir = copy(ir)
     end
+    # COMBAK this is not correct, we should make `InferenceResult` propagate `SpecInfo`
+    spec_info = let m = mi.def::Method
+        SpecInfo(Int(m.nargs), m.isva, false, nothing)
+    end
     ir.debuginfo.def = mi
-    return ir, DebugInfo(ir.debuginfo, length(ir.stmts))
+    return ir, spec_info, DebugInfo(ir.debuginfo, length(ir.stmts))
 end
 
 function handle_single_case!(todo::Vector{Pair{Int,Any}},
@@ -1348,10 +1362,6 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
             return nothing
-        elseif length(meth) == 0
-            # No applicable methods; try next union split
-            handled_all_cases = false
-            continue
         end
         local split_fully_covered = false
         for (j, match) in enumerate(meth)
@@ -1392,12 +1402,16 @@ function compute_inlining_cases(@nospecialize(info::CallInfo), flag::UInt32, sig
             handled_all_cases &= handle_any_const_result!(cases,
                 result, match, argtypes, info, flag, state; allow_typevars=true)
         end
+        if !fully_covered
+            atype = argtypes_to_type(sig.argtypes)
+            # We will emit an inline MethodError so we need a backedge to the MethodTable
+            add_uncovered_edges!(state.edges, info, atype)
+        end
     elseif !isempty(cases)
         # if we've not seen all candidates, union split is valid only for dispatch tuples
         filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
     end
-
-    return cases, (handled_all_cases & fully_covered), joint_effects
+    return cases, handled_all_cases, fully_covered, joint_effects
 end
 
 function handle_call!(todo::Vector{Pair{Int,Any}},
@@ -1405,9 +1419,9 @@ function handle_call!(todo::Vector{Pair{Int,Any}},
     state::InliningState)
     cases = compute_inlining_cases(info, flag, sig, state)
     cases === nothing && return nothing
-    cases, all_covered, joint_effects = cases
+    cases, handled_all_cases, fully_covered, joint_effects = cases
     atype = argtypes_to_type(sig.argtypes)
-    handle_cases!(todo, ir, idx, stmt, atype, cases, all_covered, joint_effects)
+    handle_cases!(todo, ir, idx, stmt, atype, cases, handled_all_cases, fully_covered, joint_effects)
 end
 
 function handle_match!(cases::Vector{InliningCase},
@@ -1458,8 +1472,8 @@ function semiconcrete_result_item(result::SemiConcreteResult,
 
     add_inlining_backedge!(et, mi)
     preserve_local_sources = OptimizationParams(state.interp).preserve_local_sources
-    ir = retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)
-    return InliningTodo(mi, ir, result.effects)
+    ir, _, debuginfo = retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)
+    return InliningTodo(mi, ir, result.spec_info, debuginfo, result.effects)
 end
 
 function handle_semi_concrete_result!(cases::Vector{InliningCase}, result::SemiConcreteResult,
@@ -1496,19 +1510,19 @@ function concrete_result_item(result::ConcreteResult, @nospecialize(info::CallIn
 end
 
 function handle_cases!(todo::Vector{Pair{Int,Any}}, ir::IRCode, idx::Int, stmt::Expr,
-    @nospecialize(atype), cases::Vector{InliningCase}, all_covered::Bool,
+    @nospecialize(atype), cases::Vector{InliningCase}, handled_all_cases::Bool, fully_covered::Bool,
     joint_effects::Effects)
     # If we only have one case and that case is fully covered, we may either
     # be able to do the inlining now (for constant cases), or push it directly
     # onto the todo list
-    if all_covered && length(cases) == 1
+    if fully_covered && handled_all_cases && length(cases) == 1
         handle_single_case!(todo, ir, idx, stmt, cases[1].item)
-    elseif length(cases) > 0
+    elseif length(cases) > 0 || handled_all_cases
         isa(atype, DataType) || return nothing
         for case in cases
             isa(case.sig, DataType) || return nothing
         end
-        push!(todo, idx=>UnionSplit(all_covered, atype, cases))
+        push!(todo, idx=>UnionSplit(handled_all_cases, fully_covered, atype, cases))
     else
         add_flag!(ir[SSAValue(idx)], flags_for_effects(joint_effects))
     end
@@ -1589,7 +1603,6 @@ function handle_finalizer_call!(ir::IRCode, idx::Int, stmt::Expr, info::Finalize
             push!(stmt.args, item1.invoke)
         elseif isa(item1, ConstantCase)
             push!(stmt.args, nothing)
-            push!(stmt.args, item1.val)
         end
     end
     return nothing
@@ -1728,7 +1741,7 @@ function late_inline_special_case!(ir::IRCode, idx::Int, stmt::Expr, flag::UInt3
                                    @nospecialize(type), sig::Signature, state::InliningState)
     OptimizationParams(state.interp).inlining || return nothing
     (; f, ft, argtypes) = sig
-    if length(argtypes) == 3 && istopfunction(f, :!==)
+    if length(argtypes) == 3 && f === Core.:(!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
         # and that works, even though inference generally avoids inferring the `!==` Method
         if isa(type, Const)
@@ -1738,7 +1751,7 @@ function late_inline_special_case!(ir::IRCode, idx::Int, stmt::Expr, flag::UInt3
         cmp_call_ssa = insert_node!(ir, idx, removable_if_unused(NewInstruction(cmp_call, Bool)))
         not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa)
         return SomeCase(not_call)
-    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
+    elseif length(argtypes) == 3 && f === Core.:(>:)
         # special-case inliner for issupertype
         # that works, even though inference generally avoids inferring the `>:` Method
         if isa(type, Const) && has_flag(flag, IR_FLAG_NOTHROW)
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index c665c5bef299e..fdcb4621c5c0f 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -2,7 +2,8 @@
 
 Core.PhiNode() = Core.PhiNode(Int32[], Any[])
 
-isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode) || isa(stmt, EnterNode) || isexpr(stmt, :leave)
+isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) ||
+    isa(stmt, ReturnNode) || isa(stmt, EnterNode) || isexpr(stmt, :leave)
 
 struct CFG
     blocks::Vector{BasicBlock}
@@ -1431,6 +1432,7 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
     elseif isa(stmt, GotoNode) && cfg_transforms_enabled
+        stmt.label < 0 && (println(stmt); println(compact))
         label = bb_rename_succ[stmt.label]
         @assert label > 0
         ssa_rename[idx] = SSAValue(result_idx)
diff --git a/base/compiler/ssair/irinterp.jl b/base/compiler/ssair/irinterp.jl
index 83881354e494e..ca8ca770df413 100644
--- a/base/compiler/ssair/irinterp.jl
+++ b/base/compiler/ssair/irinterp.jl
@@ -24,7 +24,7 @@ function concrete_eval_invoke(interp::AbstractInterpreter, ci::CodeInstance, arg
         end
         newirsv = IRInterpretationState(interp, ci, mi, argtypes, world)
         if newirsv !== nothing
-            newirsv.parent = parent
+            assign_parentchild!(newirsv, parent)
             return ir_abstract_constant_propagation(interp, newirsv)
         end
         return Pair{Any,Tuple{Bool,Bool}}(nothing, (is_nothrow(effects), is_noub(effects)))
@@ -51,8 +51,11 @@ end
 
 function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo, irsv::IRInterpretationState)
     si = StmtInfo(true) # TODO better job here?
-    call = abstract_call(interp, arginfo, si, irsv)
-    irsv.ir.stmts[irsv.curridx][:info] = call.info
+    call = abstract_call(interp, arginfo, si, irsv)::Future
+    Future{Nothing}(call, interp, irsv) do call, interp, irsv
+        irsv.ir.stmts[irsv.curridx][:info] = call.info
+        nothing
+    end
     return call
 end
 
@@ -141,8 +144,21 @@ function reprocess_instruction!(interp::AbstractInterpreter, inst::Instruction,
     rt = nothing
     if isa(stmt, Expr)
         head = stmt.head
-        if head === :call || head === :foreigncall || head === :new || head === :splatnew || head === :static_parameter || head === :isdefined || head === :boundscheck
-            (; rt, effects) = abstract_eval_statement_expr(interp, stmt, nothing, irsv)
+        if (head === :call || head === :foreigncall || head === :new || head === :splatnew ||
+            head === :static_parameter || head === :isdefined || head === :boundscheck)
+            @assert isempty(irsv.tasks) # TODO: this whole function needs to be converted to a stackless design to be a valid AbsIntState, but this should work here for now
+            result = abstract_eval_statement_expr(interp, stmt, nothing, irsv)
+            reverse!(irsv.tasks)
+            while true
+                if length(irsv.callstack) > irsv.frameid
+                    typeinf(interp, irsv.callstack[irsv.frameid + 1])
+                elseif !doworkloop(interp, irsv)
+                    break
+                end
+            end
+            @assert length(irsv.callstack) == irsv.frameid && isempty(irsv.tasks)
+            result isa Future && (result = result[])
+            (; rt, effects) = result
             add_flag!(inst, flags_for_effects(effects))
         elseif head === :invoke
             rt, (nothrow, noub) = abstract_eval_invoke_inst(interp, inst, irsv)
@@ -292,7 +308,7 @@ function is_all_const_call(@nospecialize(stmt), interp::AbstractInterpreter, irs
     return true
 end
 
-function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
+function ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState;
         externally_refined::Union{Nothing,BitSet} = nothing)
     (; ir, tpdum, ssa_refined) = irsv
 
@@ -440,20 +456,11 @@ function _ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IR
         store_backedges(frame_instance(irsv), irsv.edges)
     end
 
-    return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub))
-end
-
-function ir_abstract_constant_propagation(interp::NativeInterpreter, irsv::IRInterpretationState)
-    if __measure_typeinf__[]
-        inf_frame = Timings.InferenceFrameInfo(irsv.mi, irsv.world, VarState[], Any[], length(irsv.ir.argtypes))
-        Timings.enter_new_timer(inf_frame)
-        ret = _ir_abstract_constant_propagation(interp, irsv)
-        append!(inf_frame.slottypes, irsv.ir.argtypes)
-        Timings.exit_current_timer(inf_frame)
-        return ret
-    else
-        return _ir_abstract_constant_propagation(interp, irsv)
+    if irsv.frameid != 0
+        callstack = irsv.callstack::Vector{AbsIntState}
+        @assert callstack[end] === irsv && length(callstack) == irsv.frameid
+        pop!(callstack)
     end
+
+    return Pair{Any,Tuple{Bool,Bool}}(maybe_singleton_const(ultimate_rt), (nothrow, noub))
 end
-ir_abstract_constant_propagation(interp::AbstractInterpreter, irsv::IRInterpretationState) =
-    _ir_abstract_constant_propagation(interp, irsv)
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 6b29c9b2fe949..0e2272524a0ed 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -79,7 +79,7 @@ end
 function find_curblock(domtree::DomTree, allblocks::BitSet, curblock::Int)
     # TODO: This can be much faster by looking at current level and only
     # searching for those blocks in a sorted order
-    while !(curblock in allblocks) && curblock !== 0
+    while curblock ∉ allblocks && curblock ≠ 0
         curblock = domtree.idoms_bb[curblock]
     end
     return curblock
@@ -190,18 +190,21 @@ function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospec
     return walk_to_defs(compact, val, typeconstraint, predecessors, 𝕃ₒ)
 end
 
-function trivial_walker(@nospecialize(pi), @nospecialize(idx))
-    return nothing
-end
+abstract type WalkerCallback end
+
+struct TrivialWalker <: WalkerCallback end
+(::TrivialWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue)) = nothing
 
-function pi_walker(@nospecialize(pi), @nospecialize(idx))
-    if isa(pi, PiNode)
-        return LiftedValue(pi.val)
+struct PiWalker <: WalkerCallback end
+function (::PiWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if isa(def, PiNode)
+        return LiftedValue(def.val)
     end
     return nothing
 end
 
-function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), callback=trivial_walker)
+function simple_walk(compact::IncrementalCompact, @nospecialize(defssa::AnySSAValue),
+                     walker_callback::WalkerCallback=TrivialWalker())
     while true
         if isa(defssa, OldSSAValue)
             if already_inserted(compact, defssa)
@@ -218,7 +221,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
         end
         def = compact[defssa][:stmt]
         if isa(def, AnySSAValue)
-            callback(def, defssa)
+            walker_callback(def, defssa)
             if isa(def, SSAValue)
                 is_old(compact, defssa) && (def = OldSSAValue(def.id))
             end
@@ -226,7 +229,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
         elseif isa(def, Union{PhiNode, PhiCNode, GlobalRef})
             return defssa
         else
-            new_def = callback(def, defssa)
+            new_def = walker_callback(def, defssa)
             if new_def === nothing
                 return defssa
             end
@@ -241,16 +244,21 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
     end
 end
 
-function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
-                                @nospecialize(typeconstraint))
-    callback = function (@nospecialize(pi), @nospecialize(idx))
-        if isa(pi, PiNode)
-            typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ))
-            return LiftedValue(pi.val)
-        end
-        return nothing
+mutable struct TypeConstrainingWalker <: WalkerCallback
+    typeconstraint::Any
+    TypeConstrainingWalker(@nospecialize(typeconstraint::Any)) = new(typeconstraint)
+end
+function (walker_callback::TypeConstrainingWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if isa(def, PiNode)
+        walker_callback.typeconstraint =
+            typeintersect(walker_callback.typeconstraint, widenconst(def.typ))
+        return LiftedValue(def.val)
     end
-    def = simple_walk(compact, defssa, callback)
+    return nothing
+end
+function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(val::AnySSAValue),
+                                @nospecialize(typeconstraint))
+    def = simple_walk(compact, val, TypeConstrainingWalker(typeconstraint))
     return Pair{Any, Any}(def, typeconstraint)
 end
 
@@ -638,15 +646,17 @@ end
 
 struct SkipToken end; const SKIP_TOKEN = SkipToken()
 
-function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=::AnySSAValue=#), @nospecialize(old_value),
-                      lifted_philikes::Vector{LiftedPhilike}, lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int},
-                      walker_callback)
+function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa::AnySSAValue),
+                      @nospecialize(old_value), lifted_philikes::Vector{LiftedPhilike},
+                      lifted_leaves::Union{LiftedLeaves, LiftedDefs},
+                      reverse_mapping::IdDict{AnySSAValue, Int},
+                      walker_callback::WalkerCallback)
     val = old_value
     if is_old(compact, old_node_ssa) && isa(val, SSAValue)
         val = OldSSAValue(val.id)
     end
     if isa(val, AnySSAValue)
-        val = simple_walk(compact, val, def_walker(lifted_leaves, reverse_mapping, walker_callback))
+        val = simple_walk(compact, val, LiftedLeaveWalker(lifted_leaves, reverse_mapping, walker_callback))
     end
     if val in keys(lifted_leaves)
         lifted_val = lifted_leaves[val]
@@ -656,7 +666,7 @@ function lifted_value(compact::IncrementalCompact, @nospecialize(old_node_ssa#=:
         lifted_val === nothing && return UNDEF_TOKEN
         val = lifted_val.val
         if isa(val, AnySSAValue)
-            val = simple_walk(compact, val, pi_walker)
+            val = simple_walk(compact, val, PiWalker())
         end
         return val
     elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
@@ -673,7 +683,7 @@ function is_old(compact, @nospecialize(old_node_ssa))
     return true
 end
 
-struct PhiNest{C}
+struct PhiNest{C<:WalkerCallback}
     visited_philikes::Vector{AnySSAValue}
     lifted_philikes::Vector{LiftedPhilike}
     lifted_leaves::Union{LiftedLeaves, LiftedDefs}
@@ -743,20 +753,29 @@ function finish_phi_nest!(compact::IncrementalCompact, nest::PhiNest)
     end
 end
 
-function def_walker(lifted_leaves::Union{LiftedLeaves, LiftedDefs}, reverse_mapping::IdDict{AnySSAValue, Int}, walker_callback)
-    function (@nospecialize(walk_def), @nospecialize(defssa))
-        if (defssa in keys(lifted_leaves)) || (isa(defssa, AnySSAValue) && defssa in keys(reverse_mapping))
-            return nothing
-        end
-        isa(walk_def, PiNode) && return LiftedValue(walk_def.val)
-        return walker_callback(walk_def, defssa)
+struct LiftedLeaveWalker{C<:WalkerCallback} <: WalkerCallback
+    lifted_leaves::Union{LiftedLeaves, LiftedDefs}
+    reverse_mapping::IdDict{AnySSAValue, Int}
+    inner_walker_callback::C
+    function LiftedLeaveWalker(@nospecialize(lifted_leaves::Union{LiftedLeaves, LiftedDefs}),
+                               @nospecialize(reverse_mapping::IdDict{AnySSAValue, Int}),
+                               inner_walker_callback::C) where C<:WalkerCallback
+        return new{C}(lifted_leaves, reverse_mapping, inner_walker_callback)
+    end
+end
+function (walker_callback::LiftedLeaveWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    (; lifted_leaves, reverse_mapping, inner_walker_callback) = walker_callback
+    if defssa in keys(lifted_leaves) || defssa in keys(reverse_mapping)
+        return nothing
     end
+    isa(def, PiNode) && return LiftedValue(def.val)
+    return inner_walker_callback(def, defssa)
 end
 
 function perform_lifting!(compact::IncrementalCompact,
         visited_philikes::Vector{AnySSAValue}, @nospecialize(cache_key),
         @nospecialize(result_t), lifted_leaves::Union{LiftedLeaves, LiftedDefs}, @nospecialize(stmt_val),
-        lazydomtree::Union{LazyDomtree,Nothing}, walker_callback = trivial_walker)
+        lazydomtree::Union{LazyDomtree,Nothing}, walker_callback::WalkerCallback = TrivialWalker())
     reverse_mapping = IdDict{AnySSAValue, Int}()
     for id in 1:length(visited_philikes)
         reverse_mapping[visited_philikes[id]] = id
@@ -839,7 +858,7 @@ function perform_lifting!(compact::IncrementalCompact,
 
     # Fixup the stmt itself
     if isa(stmt_val, Union{SSAValue, OldSSAValue})
-        stmt_val = simple_walk(compact, stmt_val, def_walker(lifted_leaves, reverse_mapping, walker_callback))
+        stmt_val = simple_walk(compact, stmt_val, LiftedLeaveWalker(lifted_leaves, reverse_mapping, walker_callback))
     end
 
     if stmt_val in keys(lifted_leaves)
@@ -948,6 +967,17 @@ function keyvalue_predecessors(@nospecialize(key), 𝕃ₒ::AbstractLattice)
     end
 end
 
+struct KeyValueWalker <: WalkerCallback
+    compact::IncrementalCompact
+end
+function (walker_callback::KeyValueWalker)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, walker_callback.compact)
+        @assert length(def.args) in (5, 6)
+        return LiftedValue(def.args[end-2])
+    end
+    return nothing
+end
+
 function lift_keyvalue_get!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
     collection = stmt.args[end-1]
     key = stmt.args[end]
@@ -964,16 +994,9 @@ function lift_keyvalue_get!(compact::IncrementalCompact, idx::Int, stmt::Expr, 
         result_t = tmerge(𝕃ₒ, result_t, argextype(v.val, compact))
     end
 
-    function keyvalue_walker(@nospecialize(def), _)
-        if is_known_invoke_or_call(def, Core.OptimizedGenerics.KeyValue.set, compact)
-            @assert length(def.args) in (5, 6)
-            return LiftedValue(def.args[end-2])
-        end
-        return nothing
-    end
     (lifted_val, nest) = perform_lifting!(compact,
         visited_philikes, key, result_t, lifted_leaves, collection, nothing,
-        keyvalue_walker)
+        KeyValueWalker(compact))
 
     compact[idx] = lifted_val === nothing ? nothing : Expr(:call, GlobalRef(Core, :tuple), lifted_val.val)
     finish_phi_nest!(compact, nest)
@@ -1139,12 +1162,15 @@ end
 # which can be very large sometimes, and program counters in question are often very sparse
 const SPCSet = IdSet{Int}
 
-struct IntermediaryCollector
+struct IntermediaryCollector <: WalkerCallback
     intermediaries::SPCSet
 end
-function (this::IntermediaryCollector)(@nospecialize(pi), @nospecialize(ssa))
-    if !isa(pi, Expr)
-        push!(this.intermediaries, ssa.id)
+function (walker_callback::IntermediaryCollector)(@nospecialize(def), @nospecialize(defssa::AnySSAValue))
+    if !(def isa Expr)
+        push!(walker_callback.intermediaries, defssa.id)
+        if def isa PiNode
+            return LiftedValue(def.val)
+        end
     end
     return nothing
 end
@@ -1242,7 +1268,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
             update_scope_mapping!(scope_mapping, bb+1, bbs)
         end
         # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement)
-        is_setfield = is_isdefined = is_finalizer = is_keyvalue_get = false
+        is_setfield = is_isdefined = is_finalizer = false
         field_ordering = :unspecified
         if is_known_call(stmt, setfield!, compact)
             4 <= length(stmt.args) <= 5 || continue
@@ -1371,8 +1397,7 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
         if ismutabletypename(struct_typ_name)
             isa(val, SSAValue) || continue
             let intermediaries = SPCSet()
-                callback = IntermediaryCollector(intermediaries)
-                def = simple_walk(compact, val, callback)
+                def = simple_walk(compact, val, IntermediaryCollector(intermediaries))
                 # Mutable stuff here
                 isa(def, SSAValue) || continue
                 if defuses === nothing
@@ -1507,7 +1532,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
     end
 
     src_inlining_policy(inlining.interp, src, info, IR_FLAG_NULL) || return false
-    src, di = retrieve_ir_for_inlining(code, src)
+    src, spec_info, di = retrieve_ir_for_inlining(code, src)
 
     # For now: Require finalizer to only have one basic block
     length(src.cfg.blocks) == 1 || return false
@@ -1517,7 +1542,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int,
 
     # TODO: Should there be a special line number node for inlined finalizers?
     inline_at = ir[SSAValue(idx)][:line]
-    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, di, mi, inline_at, argexprs)
+    ssa_substitute = ir_prepare_inlining!(InsertBefore(ir, SSAValue(idx)), ir, src, spec_info, di, mi, inline_at, argexprs)
 
     # TODO: Use the actual inliner here rather than open coding this special purpose inliner.
     ssa_rename = Vector{Any}(undef, length(src.stmts))
@@ -1539,10 +1564,12 @@ end
 
 is_nothrow(ir::IRCode, ssa::SSAValue) = has_flag(ir[ssa], IR_FLAG_NOTHROW)
 
-function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = nothing)
+function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Int)
     worklist = Int[from_bb]
     visited = BitSet(from_bb)
-    if to_bb !== nothing
+    if to_bb == from_bb
+        return visited
+    else
         push!(visited, to_bb)
     end
     function visit!(bb::Int)
@@ -1557,100 +1584,78 @@ function reachable_blocks(cfg::CFG, from_bb::Int, to_bb::Union{Nothing,Int} = no
     return visited
 end
 
-function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse,
+function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int, defuse::SSADefUse,
         inlining::InliningState, lazydomtree::LazyDomtree,
         lazypostdomtree::LazyPostDomtree, @nospecialize(info::CallInfo))
     # For now, require that:
     # 1. The allocation dominates the finalizer registration
-    # 2. The finalizer registration dominates all uses reachable from the
-    #    finalizer registration.
-    # 3. The insertion block for the finalizer is the post-dominator of all
-    #    uses and the finalizer registration block. The insertion block must
-    #    be dominated by the finalizer registration block.
-    # 4. The path from the finalizer registration to the finalizer inlining
+    # 2. The insertion block for the finalizer is the post-dominator of all
+    #    uses (including the finalizer registration).
+    # 3. The path from the finalizer registration to the finalizer inlining
     #    location is nothrow
     #
-    # TODO: We could relax item 3, by inlining the finalizer multiple times.
+    # TODO: We could relax the check 2, by inlining the finalizer multiple times.
 
     # Check #1: The allocation dominates the finalizer registration
     domtree = get!(lazydomtree)
     finalizer_bb = block_for_inst(ir, finalizer_idx)
-    alloc_bb = block_for_inst(ir, idx)
+    alloc_bb = block_for_inst(ir, alloc_idx)
     dominates(domtree, alloc_bb, finalizer_bb) || return nothing
 
-    bb_insert_block::Int = finalizer_bb
-    bb_insert_idx::Union{Int,Nothing} = finalizer_idx
-    function note_block_use!(usebb::Int, useidx::Int)
-        new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree),
-            bb_insert_block, usebb)
-        if new_bb_insert_block == bb_insert_block && bb_insert_idx !== nothing
-            bb_insert_idx = max(bb_insert_idx::Int, useidx)
-        elseif new_bb_insert_block == usebb
-            bb_insert_idx = useidx
+    # Check #2: The insertion block for the finalizer is the post-dominator of all uses
+    insert_bb::Int = finalizer_bb
+    insert_idx::Union{Int,Nothing} = finalizer_idx
+    function note_defuse!(x::Union{Int,SSAUse})
+        defuse_idx = x isa SSAUse ? x.idx : x
+        defuse_idx == finalizer_idx && return nothing
+        defuse_bb = block_for_inst(ir, defuse_idx)
+        new_insert_bb = nearest_common_dominator(get!(lazypostdomtree),
+            insert_bb, defuse_bb)
+        if new_insert_bb == insert_bb && insert_idx !== nothing
+            insert_idx = max(insert_idx::Int, defuse_idx)
+        elseif new_insert_bb == defuse_bb
+            insert_idx = defuse_idx
         else
-            bb_insert_idx = nothing
+            insert_idx = nothing
         end
-        bb_insert_block = new_bb_insert_block
+        insert_bb = new_insert_bb
         nothing
     end
-
-    # Collect all reachable blocks between the finalizer registration and the
-    # insertion point
-    blocks = reachable_blocks(ir.cfg, finalizer_bb, alloc_bb)
-
-    # Check #2
-    function check_defuse(x::Union{Int,SSAUse})
-        duidx = x isa SSAUse ? x.idx : x
-        duidx == finalizer_idx && return true
-        bb = block_for_inst(ir, duidx)
-        # Not reachable from finalizer registration - we're ok
-        bb ∉ blocks && return true
-        note_block_use!(bb, duidx)
-        if dominates(domtree, finalizer_bb, bb)
-            return true
-        else
-            return false
-        end
-    end
-    all(check_defuse, defuse.uses) || return nothing
-    all(check_defuse, defuse.defs) || return nothing
-    bb_insert_block != 0 || return nothing # verify post-dominator of all uses exists
-
-    # Check #3
-    dominates(domtree, finalizer_bb, bb_insert_block) || return nothing
+    foreach(note_defuse!, defuse.uses)
+    foreach(note_defuse!, defuse.defs)
+    insert_bb != 0 || return nothing # verify post-dominator of all uses exists
 
     if !OptimizationParams(inlining.interp).assume_fatal_throw
         # Collect all reachable blocks between the finalizer registration and the
         # insertion point
-        blocks = finalizer_bb == bb_insert_block ? Int[finalizer_bb] :
-            reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block)
+        blocks = reachable_blocks(ir.cfg, finalizer_bb, insert_bb)
 
-        # Check #4
-        function check_range_nothrow(ir::IRCode, s::Int, e::Int)
+        # Check #3
+        function check_range_nothrow(s::Int, e::Int)
             return all(s:e) do sidx::Int
                 sidx == finalizer_idx && return true
-                sidx == idx && return true
+                sidx == alloc_idx && return true
                 return is_nothrow(ir, SSAValue(sidx))
             end
         end
         for bb in blocks
             range = ir.cfg.blocks[bb].stmts
             s, e = first(range), last(range)
-            if bb == bb_insert_block
-                bb_insert_idx === nothing && continue
-                e = bb_insert_idx
+            if bb == insert_bb
+                insert_idx === nothing && continue
+                e = insert_idx
             end
             if bb == finalizer_bb
                 s = finalizer_idx
             end
-            check_range_nothrow(ir, s, e) || return nothing
+            check_range_nothrow(s, e) || return nothing
         end
     end
 
     # Ok, legality check complete. Figure out the exact statement where we're
     # going to inline the finalizer.
-    loc = bb_insert_idx === nothing ? first(ir.cfg.blocks[bb_insert_block].stmts) : bb_insert_idx::Int
-    attach_after = bb_insert_idx !== nothing
+    loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int
+    attach_after = insert_idx !== nothing
 
     finalizer_stmt = ir[SSAValue(finalizer_idx)][:stmt]
     argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]]
@@ -1677,49 +1682,49 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse
     return nothing
 end
 
-function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState})
+function sroa_mutables!(ir::IRCode, defuses::IdDict{Int,Tuple{SPCSet,SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing,InliningState})
     𝕃ₒ = inlining === nothing ? SimpleInferenceLattice.instance : optimizer_lattice(inlining.interp)
     lazypostdomtree = LazyPostDomtree(ir)
-    for (idx, (intermediaries, defuse)) in defuses
-        intermediaries = collect(intermediaries)
+    for (defidx, (intermediaries, defuse)) in defuses
         # Check if there are any uses we did not account for. If so, the variable
         # escapes and we cannot eliminate the allocation. This works, because we're guaranteed
         # not to include any intermediaries that have dead uses. As a result, missing uses will only ever
         # show up in the nuses_total count.
         nleaves = length(defuse.uses) + length(defuse.defs)
         nuses = 0
-        for idx in intermediaries
-            nuses += used_ssas[idx]
+        for iidx in intermediaries
+            nuses += used_ssas[iidx]
         end
-        nuses_total = used_ssas[idx] + nuses - length(intermediaries)
+        nuses_total = used_ssas[defidx] + nuses - length(intermediaries)
         nleaves == nuses_total || continue
         # Find the type for this allocation
-        defexpr = ir[SSAValue(idx)][:stmt]
+        defexpr = ir[SSAValue(defidx)][:stmt]
         isexpr(defexpr, :new) || continue
-        newidx = idx
-        typ = unwrap_unionall(ir.stmts[newidx][:type])
+        typ = unwrap_unionall(ir.stmts[defidx][:type])
         # Could still end up here if we tried to setfield! on an immutable, which would
         # error at runtime, but is not illegal to have in the IR.
         typ = widenconst(typ)
         ismutabletype(typ) || continue
         typ = typ::DataType
         # First check for any finalizer calls
-        finalizer_idx = nothing
-        for use in defuse.uses
+        finalizer_useidx = nothing
+        for (useidx, use) in enumerate(defuse.uses)
             if use.kind === :finalizer
                 # For now: Only allow one finalizer per allocation
-                finalizer_idx !== nothing && @goto skip
-                finalizer_idx = use.idx
+                finalizer_useidx !== nothing && @goto skip
+                finalizer_useidx = useidx
             end
         end
-        if finalizer_idx !== nothing && inlining !== nothing
-            try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining,
+        all_eliminated = all_forwarded = true
+        if finalizer_useidx !== nothing && inlining !== nothing
+            finalizer_idx = defuse.uses[finalizer_useidx].idx
+            try_resolve_finalizer!(ir, defidx, finalizer_idx, defuse, inlining,
                 lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info])
-            continue
+            deleteat!(defuse.uses, finalizer_useidx)
+            all_eliminated = all_forwarded = false # can't eliminate `setfield!` calls safely
         end
         # Partition defuses by field
         fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
-        all_eliminated = all_forwarded = true
         for use in defuse.uses
             if use.kind === :preserve
                 for du in fielddefuse
@@ -1752,11 +1757,11 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
         # but we should come up with semantics for well defined semantics
         # for uninitialized fields first.
         ndefuse = length(fielddefuse)
-        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# BitSet}}(undef, ndefuse)
+        blocks = Vector{Tuple{#=phiblocks=#Vector{Int},#=allblocks=#BitSet}}(undef, ndefuse)
         for fidx in 1:ndefuse
             du = fielddefuse[fidx]
             isempty(du.uses) && continue
-            push!(du.defs, newidx)
+            push!(du.defs, defidx)
             ldu = compute_live_ins(ir.cfg, du)
             if isempty(ldu.live_in_bbs)
                 phiblocks = Int[]
@@ -1769,7 +1774,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                 for i = 1:length(du.uses)
                     use = du.uses[i]
                     if use.kind === :isdefined
-                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx)
+                        if has_safe_def(ir, get!(lazydomtree), allblocks, du, defidx, use.idx)
                             ir[SSAValue(use.idx)][:stmt] = true
                         else
                             all_eliminated = false
@@ -1782,7 +1787,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
                             continue
                         end
                     end
-                    has_safe_def(ir, get!(lazydomtree), allblocks, du, newidx, use.idx) || @goto skip
+                    has_safe_def(ir, get!(lazydomtree), allblocks, du, defidx, use.idx) || @goto skip
                 end
             else # always have some definition at the allocation site
                 for i = 1:length(du.uses)
@@ -1849,19 +1854,19 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             # all "usages" (i.e. `getfield` and `isdefined` calls) are eliminated,
             # now eliminate "definitions" (i.e. `setfield!`) calls
             # (NOTE the allocation itself will be eliminated by DCE pass later)
-            for idx in du.defs
-                idx == newidx && continue # this is allocation
+            for didx in du.defs
+                didx == defidx && continue # this is allocation
                 # verify this statement won't throw, otherwise it can't be eliminated safely
-                ssa = SSAValue(idx)
-                if is_nothrow(ir, ssa)
-                    ir[ssa][:stmt] = nothing
+                setfield_ssa = SSAValue(didx)
+                if is_nothrow(ir, setfield_ssa)
+                    ir[setfield_ssa][:stmt] = nothing
                 else
                     # We can't eliminate this statement, because it might still
                     # throw an error, but we can mark it as effect-free since we
                     # know we have removed all uses of the mutable allocation.
                     # As a result, if we ever do prove nothrow, we can delete
                     # this statement then.
-                    add_flag!(ir[ssa], IR_FLAG_EFFECT_FREE)
+                    add_flag!(ir[setfield_ssa], IR_FLAG_EFFECT_FREE)
                 end
             end
         end
@@ -1870,7 +1875,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
             # this means all ccall preserves have been replaced with forwarded loads
             # so we can potentially eliminate the allocation, otherwise we must preserve
             # the whole allocation.
-            push!(intermediaries, newidx)
+            push!(intermediaries, defidx)
         end
         # Insert the new preserves
         for (useidx, new_preserves) in preserve_uses
@@ -1882,7 +1887,7 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse
     end
 end
 
-function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preserves::Vector{Any})
+function form_new_preserves(origex::Expr, intermediaries::Union{Vector{Int},SPCSet}, new_preserves::Vector{Any})
     newex = Expr(:foreigncall)
     nccallargs = length(origex.args[3]::SimpleVector)
     for i in 1:(6+nccallargs-1)
@@ -1891,7 +1896,7 @@ function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preser
     for i in (6+nccallargs):length(origex.args)
         x = origex.args[i]
         # don't need to preserve intermediaries
-        if isa(x, SSAValue) && x.id in intermediates
+        if isa(x, SSAValue) && x.id in intermediaries
             continue
         end
         push!(newex.args, x)
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index 82ca6e364f2fa..7d936a1688aba 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -1050,6 +1050,8 @@ function Base.show(io::IO, e::Effects)
     printstyled(io, effectbits_letter(e, :noub, 'u'); color=effectbits_color(e, :noub))
     print(io, ',')
     printstyled(io, effectbits_letter(e, :nonoverlayed, 'o'); color=effectbits_color(e, :nonoverlayed))
+    print(io, ',')
+    printstyled(io, effectbits_letter(e, :nortcall, 'r'); color=effectbits_color(e, :nortcall))
     print(io, ')')
 end
 
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 756dc98863af5..e70633ffecf6a 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -88,6 +88,9 @@ function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecializ
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any))
         return UNDEF_TOKEN
+    elseif has_flag(ir.stmts[idx], IR_FLAG_NOTHROW)
+        # if the `isdefined`-ness of this slot is guaranteed by abstract interpretation,
+        # there is no need to form a `:throw_undef_if_not`
     elseif def_ssa !== true
         insert_node!(ir, idx, NewInstruction(
             Expr(:throw_undef_if_not, ci.slotnames[slot], def_ssa), Any))
@@ -153,12 +156,12 @@ end
 
 function fixup_uses!(ir::IRCode, ci::CodeInfo, code::Vector{Any}, uses::Vector{Int}, slot::Int, @nospecialize(ssa))
     for use in uses
-        code[use] = fixemup!(x::SlotNumber->slot_id(x)==slot, stmt::SlotNumber->(ssa, true), ir, ci, use, code[use])
+        code[use] = fixemup!(x::SlotNumber->slot_id(x)==slot, ::SlotNumber->Pair{Any,Any}(ssa, true), ir, ci, use, code[use])
     end
 end
 
 function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), renames::Vector{Pair{Any, Any}})
-    return fixemup!(stmt::SlotNumber->true, stmt::SlotNumber->renames[slot_id(stmt)], ir, ci, idx, stmt)
+    return fixemup!(::SlotNumber->true, x::SlotNumber->renames[slot_id(x)], ir, ci, idx, stmt)
 end
 
 # maybe use expr_type?
@@ -656,7 +659,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, sv::OptimizationState,
     visited = BitSet()
     new_nodes = ir.new_nodes
     @timeit "SSA Rename" while !isempty(worklist)
-        (item::Int, pred, incoming_vals) = pop!(worklist)
+        (item, pred, incoming_vals) = pop!(worklist)
         if sv.bb_vartables[item] === nothing
             continue
         end
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index a4286177e93a4..268991282c483 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -1,9 +1,11 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function maybe_show_ir(ir::IRCode)
-    if isdefined(Core, :Main)
+    if isdefined(Core, :Main) && isdefined(Core.Main, :Base)
         # ensure we use I/O that does not yield, as this gets called during compilation
         invokelatest(Core.Main.Base.show, Core.stdout, "text/plain", ir)
+    else
+        Core.show(ir)
     end
 end
 
@@ -25,6 +27,7 @@ is_toplevel_expr_head(head::Symbol) = head === :global || head === :method || he
 is_value_pos_expr_head(head::Symbol) = head === :static_parameter
 function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, printed_use_idx::Int, print::Bool, isforeigncall::Bool, arg_idx::Int, allow_frontend_forms::Bool)
     if isa(op, SSAValue)
+        op.id > 0 || @verify_error "Def ($(op.id)) is invalid in final IR"
         if op.id > length(ir.stmts)
             def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos)
         else
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index 25f5bb894eaa9..9dba7a4459f9e 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -13,6 +13,12 @@ struct CallMeta
     exct::Any
     effects::Effects
     info::CallInfo
+    refinements # ::Union{Nothing,SlotRefinement,Vector{Any}}
+    function CallMeta(rt::Any, exct::Any, effects::Effects, info::CallInfo,
+                      refinements=nothing)
+        @nospecialize rt exct info
+        return new(rt, exct, effects, info, refinements)
+    end
 end
 
 struct NoCallInfo <: CallInfo end
@@ -27,10 +33,16 @@ not a call to a generic function.
 """
 struct MethodMatchInfo <: CallInfo
     results::MethodLookupResult
+    mt::MethodTable
+    fullmatch::Bool
 end
 nsplit_impl(info::MethodMatchInfo) = 1
 getsplit_impl(info::MethodMatchInfo, idx::Int) = (@assert idx == 1; info.results)
 getresult_impl(::MethodMatchInfo, ::Int) = nothing
+function add_uncovered_edges_impl(edges::Vector{Any}, info::MethodMatchInfo, @nospecialize(atype))
+    fully_covering(info) || push!(edges, info.mt, atype)
+    nothing
+end
 
 """
     info::UnionSplitInfo <: CallInfo
@@ -42,20 +54,27 @@ each partition (`info.matches::Vector{MethodMatchInfo}`).
 This info is illegal on any statement that is not a call to a generic function.
 """
 struct UnionSplitInfo <: CallInfo
-    matches::Vector{MethodMatchInfo}
+    split::Vector{MethodMatchInfo}
 end
 
 nmatches(info::MethodMatchInfo) = length(info.results)
 function nmatches(info::UnionSplitInfo)
     n = 0
-    for mminfo in info.matches
+    for mminfo in info.split
         n += nmatches(mminfo)
     end
     return n
 end
-nsplit_impl(info::UnionSplitInfo) = length(info.matches)
-getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit_impl(info.matches[idx], 1)
+nsplit_impl(info::UnionSplitInfo) = length(info.split)
+getsplit_impl(info::UnionSplitInfo, idx::Int) = getsplit(info.split[idx], 1)
 getresult_impl(::UnionSplitInfo, ::Int) = nothing
+function add_uncovered_edges_impl(edges::Vector{Any}, info::UnionSplitInfo, @nospecialize(atype))
+    all(fully_covering, info.split) && return nothing
+    # add mt backedges with removing duplications
+    for mt in uncovered_method_tables(info)
+        push!(edges, mt, atype)
+    end
+end
 
 abstract type ConstResult end
 
@@ -75,6 +94,7 @@ struct SemiConcreteResult <: ConstResult
     mi::MethodInstance
     ir::IRCode
     effects::Effects
+    spec_info::SpecInfo
 end
 
 # XXX Technically this does not represent a result of constant inference, but rather that of
@@ -99,6 +119,7 @@ end
 nsplit_impl(info::ConstCallInfo) = nsplit(info.call)
 getsplit_impl(info::ConstCallInfo, idx::Int) = getsplit(info.call, idx)
 getresult_impl(info::ConstCallInfo, idx::Int) = info.results[idx]
+add_uncovered_edges_impl(edges::Vector{Any}, info::ConstCallInfo, @nospecialize(atype)) = add_uncovered_edges!(edges, info.call, atype)
 
 """
     info::MethodResultPure <: CallInfo
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index 28e883d83312c..a6b7e53c6f320 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -89,6 +89,7 @@ function add_tfunc(@nospecialize(f::Builtin), minarg::Int, maxarg::Int, @nospeci
 end
 
 add_tfunc(throw, 1, 1, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
+add_tfunc(Core.throw_methoderror, 1, INT_INF, @nospecs((𝕃::AbstractLattice, x)->Bottom), 0)
 
 # the inverse of typeof_tfunc
 # returns (type, isexact, isconcrete, istype)
@@ -134,8 +135,8 @@ function instanceof_tfunc(@nospecialize(t), astag::Bool=false, @nospecialize(tro
         end
         return tr, isexact, isconcrete, istype
     elseif isa(t, Union)
-        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(t.a, astag, troot)
-        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(t.b, astag, troot)
+        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(unwraptv(t.a), astag, troot)
+        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(unwraptv(t.b), astag, troot)
         isconcrete = isconcrete_a && isconcrete_b
         istype = istype_a && istype_b
         # most users already handle the Union case, so here we assume that
@@ -227,10 +228,19 @@ end
 @nospecs shift_tfunc(𝕃::AbstractLattice, x, y) = shift_tfunc(widenlattice(𝕃), x, y)
 @nospecs shift_tfunc(::JLTypeLattice, x, y) = widenconst(x)
 
+function not_tfunc(𝕃::AbstractLattice, @nospecialize(b))
+    if isa(b, Conditional)
+        return Conditional(b.slot, b.elsetype, b.thentype)
+    elseif isa(b, Const)
+        return Const(not_int(b.val))
+    end
+    return math_tfunc(𝕃, b)
+end
+
 add_tfunc(and_int, 2, 2, and_int_tfunc, 1)
 add_tfunc(or_int, 2, 2, or_int_tfunc, 1)
 add_tfunc(xor_int, 2, 2, math_tfunc, 1)
-add_tfunc(not_int, 1, 1, math_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
+add_tfunc(not_int, 1, 1, not_tfunc, 0) # usually used as not_int(::Bool) to negate a condition
 add_tfunc(shl_int, 2, 2, shift_tfunc, 1)
 add_tfunc(lshr_int, 2, 2, shift_tfunc, 1)
 add_tfunc(ashr_int, 2, 2, shift_tfunc, 1)
@@ -410,7 +420,7 @@ end
             else
                 return Bottom
             end
-            if 1 <= idx <= datatype_min_ninitialized(a1)
+            if 1 ≤ idx ≤ datatype_min_ninitialized(a1)
                 return Const(true)
             elseif a1.name === _NAMEDTUPLE_NAME
                 if isconcretetype(a1)
@@ -418,15 +428,21 @@ end
                 else
                     ns = a1.parameters[1]
                     if isa(ns, Tuple)
-                        return Const(1 <= idx <= length(ns))
+                        return Const(1 ≤ idx ≤ length(ns))
                     end
                 end
-            elseif idx <= 0 || (!isvatuple(a1) && idx > fieldcount(a1))
+            elseif idx ≤ 0 || (!isvatuple(a1) && idx > fieldcount(a1))
                 return Const(false)
             elseif isa(arg1, Const)
                 if !ismutabletype(a1) || isconst(a1, idx)
                     return Const(isdefined(arg1.val, idx))
                 end
+            elseif isa(arg1, PartialStruct)
+                if !isvarargtype(arg1.fields[end])
+                    if 1 ≤ idx ≤ length(arg1.fields)
+                        return Const(true)
+                    end
+                end
             elseif !isvatuple(a1)
                 fieldT = fieldtype(a1, idx)
                 if isa(fieldT, DataType) && isbitstype(fieldT)
@@ -547,9 +563,9 @@ add_tfunc(Core.sizeof, 1, 1, sizeof_tfunc, 1)
         end
     end
     if isa(x, Union)
-        na = nfields_tfunc(𝕃, x.a)
+        na = nfields_tfunc(𝕃, unwraptv(x.a))
         na === Int && return Int
-        return tmerge(na, nfields_tfunc(𝕃, x.b))
+        return tmerge(𝕃, na, nfields_tfunc(𝕃, unwraptv(x.b)))
     end
     return Int
 end
@@ -980,27 +996,39 @@ end
     ⊑ = partialorder(𝕃)
 
     # If we have s00 being a const, we can potentially refine our type-based analysis above
-    if isa(s00, Const) || isconstType(s00)
-        if !isa(s00, Const)
-            sv = (s00::DataType).parameters[1]
-        else
+    if isa(s00, Const) || isconstType(s00) || isa(s00, PartialStruct)
+        if isa(s00, Const)
             sv = s00.val
+            sty = typeof(sv)
+            nflds = nfields(sv)
+            ismod = sv isa Module
+        elseif isa(s00, PartialStruct)
+            sty = unwrap_unionall(s00.typ)
+            nflds = fieldcount_noerror(sty)
+            ismod = false
+        else
+            sv = (s00::DataType).parameters[1]
+            sty = typeof(sv)
+            nflds = nfields(sv)
+            ismod = sv isa Module
         end
         if isa(name, Const)
             nval = name.val
             if !isa(nval, Symbol)
-                isa(sv, Module) && return false
+                ismod && return false
                 isa(nval, Int) || return false
             end
             return isdefined_tfunc(𝕃, s00, name) === Const(true)
         end
-        boundscheck && return false
+
         # If bounds checking is disabled and all fields are assigned,
         # we may assume that we don't throw
-        isa(sv, Module) && return false
+        @assert !boundscheck
+        ismod && return false
         name ⊑ Int || name ⊑ Symbol || return false
-        typeof(sv).name.n_uninitialized == 0 && return true
-        for i = (datatype_min_ninitialized(typeof(sv)) + 1):nfields(sv)
+        sty.name.n_uninitialized == 0 && return true
+        nflds === nothing && return false
+        for i = (datatype_min_ninitialized(sty)+1):nflds
             isdefined_tfunc(𝕃, s00, Const(i)) === Const(true) || return false
         end
         return true
@@ -1355,10 +1383,10 @@ end
 
     nargs = length(argtypes)
     if !isempty(argtypes) && isvarargtype(argtypes[nargs])
-        nargs - 1 <= maxargs || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-        nargs + 1 >= op_argi || return CallMeta(Any, Any, Effects(), NoCallInfo())
+        nargs - 1 <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+        nargs + 1 >= op_argi || return Future(CallMeta(Any, Any, Effects(), NoCallInfo()))
     else
-        minargs <= nargs <= maxargs || return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
+        minargs <= nargs <= maxargs || return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
     end
     𝕃ᵢ = typeinf_lattice(interp)
     if ff === modifyfield!
@@ -1389,15 +1417,22 @@ end
         op = unwrapva(argtypes[op_argi])
         v = unwrapva(argtypes[v_argi])
         callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), StmtInfo(true), sv, #=max_methods=#1)
-        TF2 = tmeet(callinfo.rt, widenconst(TF))
-        if TF2 === Bottom
-            RT = Bottom
-        elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
-            RT = PartialStruct(RT, Any[TF, TF2])
+        TF = Core.Box(TF)
+        RT = Core.Box(RT)
+        return Future{CallMeta}(callinfo, interp, sv) do callinfo, interp, sv
+            TF = TF.contents
+            RT = RT.contents
+            TF2 = tmeet(callinfo.rt, widenconst(TF))
+            if TF2 === Bottom
+                RT = Bottom
+            elseif isconcretetype(RT) && has_nontrivial_extended_info(𝕃ᵢ, TF2) # isconcrete condition required to form a PartialStruct
+                RT = PartialStruct(RT, Any[TF, TF2])
+            end
+            info = ModifyOpInfo(callinfo.info)
+            return CallMeta(RT, Any, Effects(), info)
         end
-        info = ModifyOpInfo(callinfo.info)
     end
-    return CallMeta(RT, Any, Effects(), info)
+    return Future(CallMeta(RT, Any, Effects(), info))
 end
 
 # we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
@@ -2286,6 +2321,7 @@ const _CONSISTENT_BUILTINS = Any[
     (<:),
     typeassert,
     throw,
+    Core.throw_methoderror,
     setfield!,
     donotdelete
 ]
@@ -2308,6 +2344,7 @@ const _EFFECT_FREE_BUILTINS = [
     (<:),
     typeassert,
     throw,
+    Core.throw_methoderror,
     getglobal,
     compilerbarrier,
 ]
@@ -2323,6 +2360,7 @@ const _INACCESSIBLEMEM_BUILTINS = Any[
     isa,
     nfields,
     throw,
+    Core.throw_methoderror,
     tuple,
     typeassert,
     typeof,
@@ -2862,19 +2900,19 @@ end
 # since abstract_call_gf_by_type is a very inaccurate model of _method and of typeinf_type,
 # while this assumes that it is an absolutely precise and accurate and exact model of both
 function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, si::StmtInfo, sv::AbsIntState)
-    UNKNOWN = CallMeta(Type, Any, EFFECTS_THROWS, NoCallInfo())
+    UNKNOWN = CallMeta(Type, Any, Effects(EFFECTS_THROWS; nortcall=false), NoCallInfo())
     if !(2 <= length(argtypes) <= 3)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     tt = widenslotwrapper(argtypes[end])
     if !isa(tt, Const) && !(isType(tt) && !has_free_typevars(tt))
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
     if !isa(af_argtype, DataType) || !(af_argtype <: Tuple)
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
     if length(argtypes) == 3
@@ -2887,11 +2925,15 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     end
     if !(isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
             (isconcretetype(aft) && !(aft <: Builtin) && !iskindtype(aft)))
-        return UNKNOWN
+        return Future(UNKNOWN)
     end
 
+    # effects are not an issue if we know this statement will get removed, but if it does not get removed,
+    # then this could be recursively re-entering inference (via concrete-eval), which will not terminate
+    RT_CALL_EFFECTS = Effects(EFFECTS_TOTAL; nortcall=false)
+
     if contains_is(argtypes_vec, Union{})
-        return CallMeta(Const(Union{}), Union{}, EFFECTS_TOTAL, NoCallInfo())
+        return Future(CallMeta(Const(Union{}), Union{}, RT_CALL_EFFECTS, NoCallInfo()))
     end
 
     # Run the abstract_call without restricting abstract call
@@ -2900,42 +2942,45 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
     if isa(sv, InferenceState)
         old_restrict = sv.restrict_abstract_call_sites
         sv.restrict_abstract_call_sites = false
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-        sv.restrict_abstract_call_sites = old_restrict
-    else
-        call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
-    end
-    info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
-    rt = widenslotwrapper(call.rt)
-    if isa(rt, Const)
-        # output was computed to be constant
-        return CallMeta(Const(typeof(rt.val)), Union{}, EFFECTS_TOTAL, info)
-    end
-    rt = widenconst(rt)
-    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
-        # output cannot be improved so it is known for certain
-        return CallMeta(Const(rt), Union{}, EFFECTS_TOTAL, info)
-    elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
-        # conservatively express uncertainty of this result
-        # in two ways: both as being a subtype of this, and
-        # because of LimitedAccuracy causes
-        return CallMeta(Type{<:rt}, Union{}, EFFECTS_TOTAL, info)
-    elseif isa(tt, Const) || isconstType(tt)
-        # input arguments were known for certain
-        # XXX: this doesn't imply we know anything about rt
-        return CallMeta(Const(rt), Union{}, EFFECTS_TOTAL, info)
-    elseif isType(rt)
-        return CallMeta(Type{rt}, Union{}, EFFECTS_TOTAL, info)
-    else
-        return CallMeta(Type{<:rt}, Union{}, EFFECTS_TOTAL, info)
+    end
+    call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), si, sv, #=max_methods=#-1)
+    tt = Core.Box(tt)
+    return Future{CallMeta}(call, interp, sv) do call, interp, sv
+        if isa(sv, InferenceState)
+            sv.restrict_abstract_call_sites = old_restrict
+        end
+        info = verbose_stmt_info(interp) ? MethodResultPure(ReturnTypeCallInfo(call.info)) : MethodResultPure()
+        rt = widenslotwrapper(call.rt)
+        if isa(rt, Const)
+            # output was computed to be constant
+            return CallMeta(Const(typeof(rt.val)), Union{}, RT_CALL_EFFECTS, info)
+        end
+        rt = widenconst(rt)
+        if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
+            # output cannot be improved so it is known for certain
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(sv, InferenceState) && !isempty(sv.pclimitations)
+            # conservatively express uncertainty of this result
+            # in two ways: both as being a subtype of this, and
+            # because of LimitedAccuracy causes
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        elseif isa(tt.contents, Const) || isconstType(tt.contents)
+            # input arguments were known for certain
+            # XXX: this doesn't imply we know anything about rt
+            return CallMeta(Const(rt), Union{}, RT_CALL_EFFECTS, info)
+        elseif isType(rt)
+            return CallMeta(Type{rt}, Union{}, RT_CALL_EFFECTS, info)
+        else
+            return CallMeta(Type{<:rt}, Union{}, RT_CALL_EFFECTS, info)
+        end
     end
 end
 
 # a simplified model of abstract_call_gf_by_type for applicable
 function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
                              sv::AbsIntState, max_methods::Int)
-    length(argtypes) < 2 && return CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo())
-    isvarargtype(argtypes[2]) && return CallMeta(Bool, Any, EFFECTS_UNKNOWN, NoCallInfo())
+    length(argtypes) < 2 && return Future(CallMeta(Bottom, Any, EFFECTS_THROWS, NoCallInfo()))
+    isvarargtype(argtypes[2]) && return Future(CallMeta(Bool, Any, EFFECTS_THROWS, NoCallInfo()))
     argtypes = argtypes[2:end]
     atype = argtypes_to_type(argtypes)
     matches = find_method_matches(interp, argtypes, atype; max_methods)
@@ -2944,36 +2989,25 @@ function abstract_applicable(interp::AbstractInterpreter, argtypes::Vector{Any},
     else
         (; valid_worlds, applicable) = matches
         update_valid_age!(sv, valid_worlds)
-
-        # also need an edge to the method table in case something gets
-        # added that did not intersect with any existing method
-        if isa(matches, MethodMatches)
-            matches.fullmatch || add_mt_backedge!(sv, matches.mt, atype)
-        else
-            for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
-                thisfullmatch || add_mt_backedge!(sv, mt, atype)
-            end
-        end
-
         napplicable = length(applicable)
         if napplicable == 0
             rt = Const(false) # never any matches
+        elseif !fully_covering(matches) || any_ambig(matches)
+            # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+            rt = Bool
         else
             rt = Const(true) # has applicable matches
-            for i in 1:napplicable
-                match = applicable[i]::MethodMatch
-                edge = specialize_method(match)::MethodInstance
-                add_backedge!(sv, edge)
-            end
-
-            if isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
-                    (!all(matches.fullmatches) || any_ambig(matches))
-                # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
-                rt = Bool
-            end
         end
+        for i in 1:napplicable
+            match = applicable[i]::MethodMatch
+            edge = specialize_method(match)::MethodInstance
+            add_backedge!(sv, edge)
+        end
+        # also need an edge to the method table in case something gets
+        # added that did not intersect with any existing method
+        add_uncovered_edges!(sv, matches, atype)
     end
-    return CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo())
+    return Future(CallMeta(rt, Union{}, EFFECTS_TOTAL, NoCallInfo()))
 end
 add_tfunc(applicable, 1, INT_INF, @nospecs((𝕃::AbstractLattice, f, args...)->Bool), 40)
 
@@ -3160,6 +3194,11 @@ function foreigncall_effects(@specialize(abstract_eval), e::Expr)
     elseif name === :jl_genericmemory_copy_slice
         return Effects(EFFECTS_TOTAL; consistent=CONSISTENT_IF_NOTRETURNED, nothrow=false)
     end
+    # `:foreigncall` can potentially perform all sorts of operations, including calling
+    # overlay methods, but the `:foreigncall` itself is not dispatched, and there is no
+    # concern that the method calls that potentially occur within the `:foreigncall` will
+    # be executed using the wrong method table due to concrete evaluation, so using
+    # `EFFECTS_UNKNOWN` here and not tainting with `:nonoverlayed` is fine
     return EFFECTS_UNKNOWN
 end
 
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 77547a5b30877..8b85f7c6f35f1 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -56,7 +56,7 @@ end
 Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
 Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
 
-_time_ns() = ccall(:jl_hrtime, UInt64, ())  # Re-implemented here because Base not yet available.
+_time_ns() = ccall(:jl_hrtime, UInt64, ())
 
 # We keep a stack of the Timings for each of the MethodInstances currently being timed.
 # Since type inference currently operates via a depth-first search (during abstract
@@ -77,114 +77,14 @@ const ROOTmi = Core.Compiler.specialize_method(
 Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
 start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
 """
-function reset_timings()
-    empty!(_timings)
-    push!(_timings, Timing(
-        # The MethodInstance for ROOT(), and default empty values for other fields.
-        InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
-        _time_ns()))
-    return nothing
-end
-reset_timings()
-
-# (This is split into a function so that it can be called both in this module, at the top
-# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
-# the operation and called `reset_timings()`.)
-# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between
-# timer manipulations and the tasks we're timing.
-@inline function close_current_timer()
-    stop_time = _time_ns()
-    parent_timer = _timings[end]
-    accum_time = stop_time - parent_timer.cur_start_time
-
-    # Add in accum_time ("modify" the immutable struct)
-    @inbounds begin
-        _timings[end] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            parent_timer.cur_start_time,
-            parent_timer.time + accum_time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-    return nothing
-end
-
-@inline function enter_new_timer(frame)
-    # Very first thing, stop the active timer: get the current time and add in the
-    # time since it was last started to its aggregate exclusive time.
-    close_current_timer()
-
-    mi_info = _typeinf_identifier(frame)
-
-    # Start the new timer right before returning
-    push!(_timings, Timing(mi_info, UInt64(0)))
-    len = length(_timings)
-    new_timer = @inbounds _timings[len]
-    # Set the current time _after_ appending the node, to try to exclude the
-    # overhead from measurement.
-    start = _time_ns()
-
-    @inbounds begin
-        _timings[len] = Timing(
-            new_timer.mi_info,
-            start,
-            start,
-            new_timer.time,
-            new_timer.children,
-        )
-    end
-
-    return nothing
-end
-
-# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
-# assert that indeed we are always returning to a parent after finishing all of its
-# children (that is, asserting that inference proceeds via depth-first-search).
-@inline function exit_current_timer(_expected_frame_)
-    # Finish the new timer
-    stop_time = _time_ns()
-
-    expected_mi_info = _typeinf_identifier(_expected_frame_)
-
-    # Grab the new timer again because it might have been modified in _timings
-    # (since it's an immutable struct)
-    # And remove it from the current timings stack
-    new_timer = pop!(_timings)
-    Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi
-
-    # Prepare to unwind one level of the stack and record in the parent
-    parent_timer = _timings[end]
-
-    accum_time = stop_time - new_timer.cur_start_time
-    # Add in accum_time ("modify" the immutable struct)
-    new_timer = Timing(
-        new_timer.mi_info,
-        new_timer.start_time,
-        new_timer.cur_start_time,
-        new_timer.time + accum_time,
-        new_timer.children,
-        parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing,
-    )
-    # Record the final timing with the original parent timer
-    push!(parent_timer.children, new_timer)
-
-    # And finally restart the parent timer:
-    len = length(_timings)
-    @inbounds begin
-        _timings[len] = Timing(
-            parent_timer.mi_info,
-            parent_timer.start_time,
-            _time_ns(),
-            parent_timer.time,
-            parent_timer.children,
-            parent_timer.bt,
-        )
-    end
-
-    return nothing
-end
+function reset_timings() end
+push!(_timings, Timing(
+    # The MethodInstance for ROOT(), and default empty values for other fields.
+    InferenceFrameInfo(ROOTmi, 0x0, Core.Compiler.VarState[], Any[Core.Const(ROOT)], 1),
+    _time_ns()))
+function close_current_timer() end
+function enter_new_timer(frame) end
+function exit_current_timer(_expected_frame_) end
 
 end  # module Timings
 
@@ -194,19 +94,7 @@ end  # module Timings
 If set to `true`, record per-method-instance timings within type inference in the Compiler.
 """
 __set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
-const __measure_typeinf__ = fill(false)
-
-# Wrapper around `_typeinf` that optionally records the exclusive time for each invocation.
-function typeinf(interp::AbstractInterpreter, frame::InferenceState)
-    if __measure_typeinf__[]
-        Timings.enter_new_timer(frame)
-        v = _typeinf(interp, frame)
-        Timings.exit_current_timer(frame)
-        return v
-    else
-        return _typeinf(interp, frame)
-    end
-end
+const __measure_typeinf__ = RefValue{Bool}(false)
 
 function finish!(interp::AbstractInterpreter, caller::InferenceState;
                  can_discard_trees::Bool=may_discard_trees(interp))
@@ -258,39 +146,27 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState;
     return nothing
 end
 
-function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
-    typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
-    # with no active ip's, frame is done
-    frames = frame.callers_in_cycle
-    if isempty(frames)
-        finish_nocycle(interp, frame)
-    elseif length(frames) == 1
-        @assert frames[1] === frame "invalid callers_in_cycle"
-        finish_nocycle(interp, frame)
-    else
-        finish_cycle(interp, frames)
-    end
-    empty!(frames)
-    return true
-end
-
 function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
-    frame.dont_work_on_me = true
     finishinfer!(frame, frame.interp)
     opt = frame.result.src
     if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
         optimize(frame.interp, opt, frame.result)
     end
     finish!(frame.interp, frame)
+    if frame.cycleid != 0
+        frames = frame.callstack::Vector{AbsIntState}
+        @assert frames[end] === frame
+        pop!(frames)
+    end
     return nothing
 end
 
-function finish_cycle(::AbstractInterpreter, frames::Vector{InferenceState})
+function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cycleid::Int)
     cycle_valid_worlds = WorldRange()
     cycle_valid_effects = EFFECTS_TOTAL
-    for caller in frames
-        @assert !(caller.dont_work_on_me)
-        caller.dont_work_on_me = true
+    for caller in cycleid:length(frames)
+        caller = frames[caller]::InferenceState
+        @assert caller.cycleid == cycleid
         # converge the world age range and effects for this cycle here:
         # all frames in the cycle should have the same bits of `valid_worlds` and `effects`
         # that are simply the intersection of each partial computation, without having
@@ -298,19 +174,23 @@ function finish_cycle(::AbstractInterpreter, frames::Vector{InferenceState})
         cycle_valid_worlds = intersect(cycle_valid_worlds, caller.valid_worlds)
         cycle_valid_effects = merge_effects(cycle_valid_effects, caller.ipo_effects)
     end
-    for caller in frames
+    for caller in cycleid:length(frames)
+        caller = frames[caller]::InferenceState
         adjust_cycle_frame!(caller, cycle_valid_worlds, cycle_valid_effects)
         finishinfer!(caller, caller.interp)
     end
-    for caller in frames
+    for caller in cycleid:length(frames)
+        caller = frames[caller]::InferenceState
         opt = caller.result.src
         if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
             optimize(caller.interp, opt, caller.result)
         end
     end
-    for caller in frames
+    for caller in cycleid:length(frames)
+        caller = frames[caller]::InferenceState
         finish!(caller.interp, caller)
     end
+    resize!(frames, cycleid - 1)
     return nothing
 end
 
@@ -396,9 +276,9 @@ end
 
 function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
     if typ isa LimitedAccuracy
-        if sv.parent === nothing
+        if sv.parentid === 0
             # we might have introduced a limit marker, but we should know it must be sv and other callers_in_cycle
-            #@assert !isempty(sv.callers_in_cycle)
+            #@assert !isempty(callers_in_cycle(sv))
             #  FIXME: this assert fails, appearing to indicate there is a bug in filtering this list earlier.
             #  In particular (during doctests for example), during inference of
             #  show(Base.IOContext{Base.GenericIOBuffer{Memory{UInt8}}}, Base.Multimedia.MIME{:var"text/plain"}, LinearAlgebra.BunchKaufman{Float64, Array{Float64, 2}, Array{Int64, 1}})
@@ -407,7 +287,7 @@ function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
         end
         causes = copy(typ.causes)
         delete!(causes, sv)
-        for caller in sv.callers_in_cycle
+        for caller in callers_in_cycle(sv)
             delete!(causes, caller)
         end
         if isempty(causes)
@@ -449,6 +329,9 @@ function adjust_effects(ipo_effects::Effects, def::Method)
     if is_effect_overridden(override, :consistent_overlay)
         ipo_effects = Effects(ipo_effects; nonoverlayed=CONSISTENT_OVERLAY)
     end
+    if is_effect_overridden(override, :nortcall)
+        ipo_effects = Effects(ipo_effects; nortcall=true)
+    end
     return ipo_effects
 end
 
@@ -518,6 +401,7 @@ end
 # update the MethodInstance
 function finishinfer!(me::InferenceState, interp::AbstractInterpreter)
     # prepare to run optimization passes on fulltree
+    @assert isempty(me.ip)
     s_edges = get_stmt_edges!(me, 1)
     for i = 2:length(me.stmt_edges)
         isassigned(me.stmt_edges, i) || continue
@@ -538,7 +422,7 @@ function finishinfer!(me::InferenceState, interp::AbstractInterpreter)
         gt = me.ssavaluetypes
         for j = 1:length(gt)
             gt[j] = gtj = cycle_fix_limited(gt[j], me)
-            if gtj isa LimitedAccuracy && me.parent !== nothing
+            if gtj isa LimitedAccuracy && me.parentid != 0
                 limited_src = true
                 break
             end
@@ -570,10 +454,10 @@ function finishinfer!(me::InferenceState, interp::AbstractInterpreter)
         type_annotate!(interp, me)
         mayopt = may_optimize(interp)
         doopt = mayopt &&
-                # disable optimization if we don't use this later
-                (me.cache_mode != CACHE_MODE_NULL || me.parent !== nothing) &&
+                # disable optimization if we don't use this later (because it is not cached)
+                me.cache_mode != CACHE_MODE_NULL &&
                 # disable optimization if we've already obtained very accurate result
-                !result_is_constabi(interp, result, mayopt)
+                !result_is_constabi(interp, result)
         if doopt
             result.src = OptimizationState(me, interp)
         else
@@ -743,41 +627,24 @@ function type_annotate!(interp::AbstractInterpreter, sv::InferenceState)
     return nothing
 end
 
-# at the end, all items in b's cycle
-# will now be added to a's cycle
-function union_caller_cycle!(a::InferenceState, b::InferenceState)
-    callers_in_cycle = b.callers_in_cycle
-    b.parent = a.parent
-    b.callers_in_cycle = a.callers_in_cycle
-    contains_is(a.callers_in_cycle, b) || push!(a.callers_in_cycle, b)
-    if callers_in_cycle !== a.callers_in_cycle
-        for caller in callers_in_cycle
-            if caller !== b
-                caller.parent = a.parent
-                caller.callers_in_cycle = a.callers_in_cycle
-                push!(a.callers_in_cycle, caller)
-            end
-        end
-    end
-    return
-end
-
-function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, ancestor::InferenceState, child::InferenceState)
+function merge_call_chain!(interp::AbstractInterpreter, parent::InferenceState, child::InferenceState)
     # add backedge of parent <- child
     # then add all backedges of parent <- parent.parent
-    # and merge all of the callers into ancestor.callers_in_cycle
-    # and ensure that walking the parent list will get the same result (DAG) from everywhere
+    frames = parent.callstack::Vector{AbsIntState}
+    @assert child.callstack === frames
+    ancestorid = child.cycleid
     while true
         add_cycle_backedge!(parent, child)
-        union_caller_cycle!(ancestor, child)
+        parent.cycleid === ancestorid && break
         child = parent
-        child === ancestor && break
-        parent = frame_parent(child)
-        while !isa(parent, InferenceState)
-            # XXX we may miss some edges here?
-            parent = frame_parent(parent::IRInterpretationState)
-        end
-        parent = parent::InferenceState
+        parent = frame_parent(child)::InferenceState
+    end
+    # ensure that walking the callstack has the same cycleid (DAG)
+    for frame = reverse(ancestorid:length(frames))
+        frame = frames[frame]::InferenceState
+        frame.cycleid == ancestorid && break
+        @assert frame.cycleid > ancestorid
+        frame.cycleid = ancestorid
     end
 end
 
@@ -793,19 +660,20 @@ end
 # Walk through `mi`'s upstream call chain, starting at `parent`. If a parent
 # frame matching `mi` is encountered, then there is a cycle in the call graph
 # (i.e. `mi` is a descendant callee of itself). Upon encountering this cycle,
-# we "resolve" it by merging the call chain, which entails unioning each intermediary
-# frame's `callers_in_cycle` field and adding the appropriate backedges. Finally,
+# we "resolve" it by merging the call chain, which entails updating each intermediary
+# frame's `cycleid` field and adding the appropriate backedges. Finally,
 # we return `mi`'s pre-existing frame. If no cycles are found, `nothing` is
 # returned instead.
 function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, parent::AbsIntState)
     # TODO (#48913) implement a proper recursion handling for irinterp:
-    # This works just because currently the `:terminate` condition guarantees that
-    # irinterp doesn't fail into unresolved cycles, but it's not a good solution.
-    # We should revisit this once we have a better story for handling cycles in irinterp.
-    isa(parent, InferenceState) || return false
-    frame = parent
+    # This works currently just because the irinterp code doesn't get used much with
+    # `@assume_effects`, so it never sees a cycle normally, but that may not be a sustainable solution.
+    parent isa InferenceState || return false
+    frames = parent.callstack::Vector{AbsIntState}
     uncached = false
-    while isa(frame, InferenceState)
+    for frame = reverse(1:length(frames))
+        frame = frames[frame]
+        isa(frame, InferenceState) || break
         uncached |= !is_cached(frame) # ensure we never add an uncached frame to a cycle
         if is_same_frame(interp, mi, frame)
             if uncached
@@ -815,20 +683,9 @@ function resolve_call_cycle!(interp::AbstractInterpreter, mi::MethodInstance, pa
                 poison_callstack!(parent, frame)
                 return true
             end
-            merge_call_chain!(interp, parent, frame, frame)
+            merge_call_chain!(interp, parent, frame)
             return frame
         end
-        for caller in callers_in_cycle(frame)
-            if is_same_frame(interp, mi, caller)
-                if uncached
-                    poison_callstack!(parent, frame)
-                    return true
-                end
-                merge_call_chain!(interp, parent, frame, caller)
-                return caller
-            end
-        end
-        frame = frame_parent(frame)
     end
     return false
 end
@@ -850,15 +707,43 @@ struct EdgeCallResult
 end
 
 # return cached result of regular inference
-function return_cached_result(::AbstractInterpreter, codeinst::CodeInstance, caller::AbsIntState)
+function return_cached_result(interp::AbstractInterpreter, method::Method, codeinst::CodeInstance, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
     rt = cached_return_type(codeinst)
     effects = ipo_effects(codeinst)
     update_valid_age!(caller, WorldRange(min_world(codeinst), max_world(codeinst)))
-    return EdgeCallResult(rt, codeinst.exctype, codeinst.def, effects)
+    return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(rt, codeinst.exctype, codeinst.def, effects), edgecycle, edgelimited))
+end
+
+function EdgeCall_to_MethodCall_Result(interp::AbstractInterpreter, sv::AbsIntState, method::Method, result::EdgeCallResult, edgecycle::Bool, edgelimited::Bool)
+    (; rt, exct, edge, effects, volatile_inf_result) = result
+
+    if edge === nothing
+        edgecycle = edgelimited = true
+    end
+
+    # we look for the termination effect override here as well, since the :terminates effect
+    # may have been tainted due to recursion at this point even if it's overridden
+    if is_effect_overridden(sv, :terminates_globally)
+        # this frame is known to terminate
+        effects = Effects(effects, terminates=true)
+    elseif is_effect_overridden(method, :terminates_globally)
+        # this edge is known to terminate
+        effects = Effects(effects; terminates=true)
+    elseif edgecycle
+        # Some sort of recursion was detected.
+        if edge !== nothing && !edgelimited && !is_edge_recursed(edge, sv)
+            # no `MethodInstance` cycles -- don't taint :terminate
+        else
+            # we cannot guarantee that the call will terminate
+            effects = Effects(effects; terminates=false)
+        end
+    end
+
+    return MethodCallResult(rt, exct, edgecycle, edgelimited, edge, effects, volatile_inf_result)
 end
 
 # compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState)
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::AbsIntState, edgecycle::Bool, edgelimited::Bool)
     mi = specialize_method(method, atype, sparams)::MethodInstance
     cache_mode = CACHE_MODE_GLOBAL # cache edge targets globally by default
     force_inline = is_stmt_inline(get_curr_ssaflag(caller))
@@ -872,13 +757,13 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
                 cache_mode = CACHE_MODE_VOLATILE
             else
                 @assert codeinst.def === mi "MethodInstance for cached edge does not match"
-                return return_cached_result(interp, codeinst, caller)
+                return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited)
             end
         end
     end
     if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_output(#=incremental=#false)
-        add_remark!(interp, caller, "Inference is disabled for the target module")
-        return EdgeCallResult(Any, Any, nothing, Effects())
+        add_remark!(interp, caller, "[typeinf_edge] Inference is disabled for the target module")
+        return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
     end
     if !is_cached(caller) && frame_parent(caller) === nothing
         # this caller exists to return to the user
@@ -899,7 +784,7 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
                         cache_mode = CACHE_MODE_VOLATILE
                     else
                         @assert codeinst.def === mi "MethodInstance for cached edge does not match"
-                        return return_cached_result(interp, codeinst, caller)
+                        return return_cached_result(interp, method, codeinst, caller, edgecycle, edgelimited)
                     end
                 end
             end
@@ -910,37 +795,45 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         end
         frame = InferenceState(result, cache_mode, interp) # always use the cache for edge targets
         if frame === nothing
-            add_remark!(interp, caller, "Failed to retrieve source")
+            add_remark!(interp, caller, "[typeinf_edge] Failed to retrieve source")
             # can't get the source for this, so we know nothing
             if cache_mode == CACHE_MODE_GLOBAL
                 engine_reject(interp, ci)
             end
-            return EdgeCallResult(Any, Any, nothing, Effects())
+            return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
         end
-        if is_cached(caller) || frame_parent(caller) !== nothing # don't involve uncached functions in cycle resolution
-            frame.parent = caller
+        assign_parentchild!(frame, caller)
+        # the actual inference task for this edge is going to be scheduled within `typeinf_local` via the callstack queue
+        # while splitting off the rest of the work for this caller into a separate workq thunk
+        let mresult = Future{MethodCallResult}()
+            push!(caller.tasks, function get_infer_result(interp, caller)
+                update_valid_age!(caller, frame.valid_worlds)
+                local isinferred = is_inferred(frame)
+                local edge = isinferred ? mi : nothing
+                local effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects
+                    adjust_effects(effects_for_cycle(frame.ipo_effects), method)
+                local exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
+                # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
+                # note that this result is cached globally exclusively, so we can use this local result destructively
+                local volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing
+                local edgeresult = EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result)
+                mresult[] = EdgeCall_to_MethodCall_Result(interp, caller, method, edgeresult, edgecycle, edgelimited)
+                return true
+            end)
+            return mresult
         end
-        typeinf(interp, frame)
-        update_valid_age!(caller, frame.valid_worlds)
-        isinferred = is_inferred(frame)
-        edge = isinferred ? mi : nothing
-        effects = isinferred ? frame.result.ipo_effects : # effects are adjusted already within `finish` for ipo_effects
-            adjust_effects(effects_for_cycle(frame.ipo_effects), method)
-        exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
-        # propagate newly inferred source to the inliner, allowing efficient inlining w/o deserialization:
-        # note that this result is cached globally exclusively, so we can use this local result destructively
-        volatile_inf_result = isinferred ? VolatileInferenceResult(result) : nothing
-        return EdgeCallResult(frame.bestguess, exc_bestguess, edge, effects, volatile_inf_result)
     elseif frame === true
         # unresolvable cycle
-        return EdgeCallResult(Any, Any, nothing, Effects())
+        add_remark!(interp, caller, "[typeinf_edge] Unresolvable cycle")
+        return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, EdgeCallResult(Any, Any, nothing, Effects()), edgecycle, edgelimited))
     end
     # return the current knowledge about this cycle
     frame = frame::InferenceState
     update_valid_age!(caller, frame.valid_worlds)
     effects = adjust_effects(effects_for_cycle(frame.ipo_effects), method)
     exc_bestguess = refine_exception_type(frame.exc_bestguess, effects)
-    return EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects)
+    edgeresult = EdgeCallResult(frame.bestguess, exc_bestguess, nothing, effects)
+    return Future(EdgeCall_to_MethodCall_Result(interp, caller, method, edgeresult, edgecycle, edgelimited))
 end
 
 # The `:terminates` effect bit must be conservatively tainted unless recursion cycle has
@@ -1008,9 +901,8 @@ function codeinstance_for_const_with_code(interp::AbstractInterpreter, code::Cod
         code.relocatability, src.debuginfo)
 end
 
-result_is_constabi(interp::AbstractInterpreter, result::InferenceResult,
-                   run_optimizer::Bool=may_optimize(interp)) =
-    run_optimizer && may_discard_trees(interp) && is_result_constabi_eligible(result)
+result_is_constabi(interp::AbstractInterpreter, result::InferenceResult) =
+    may_discard_trees(interp) && is_result_constabi_eligible(result)
 
 # compute an inferred AST and return type
 typeinf_code(interp::AbstractInterpreter, match::MethodMatch, run_optimizer::Bool) =
@@ -1021,11 +913,6 @@ typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype),
 function typeinf_code(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
     frame = typeinf_frame(interp, mi, run_optimizer)
     frame === nothing && return nothing
-    is_inferred(frame) || return nothing
-    if result_is_constabi(interp, frame.result, run_optimizer)
-        rt = frame.result.result::Const
-        return codeinfo_for_const(interp, frame.linfo, rt.val)
-    end
     return frame.src
 end
 
@@ -1048,17 +935,14 @@ typeinf_ircode(interp::AbstractInterpreter, method::Method, @nospecialize(atype)
     typeinf_ircode(interp, specialize_method(method, atype, sparams), optimize_until)
 function typeinf_ircode(interp::AbstractInterpreter, mi::MethodInstance,
                         optimize_until::Union{Integer,AbstractString,Nothing})
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
     frame = typeinf_frame(interp, mi, false)
     if frame === nothing
-        ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
         return nothing, Any
     end
     (; result) = frame
     opt = OptimizationState(frame, interp)
-    ir = run_passes_ipo_safe(opt.src, opt, result, optimize_until)
+    ir = run_passes_ipo_safe(opt.src, opt, optimize_until)
     rt = widenconst(ignorelimited(result.result))
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
     return ir, rt
 end
 
@@ -1069,13 +953,22 @@ typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype),
               run_optimizer::Bool) =
     typeinf_frame(interp, specialize_method(method, atype, sparams), run_optimizer)
 function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_optimizer::Bool)
-    start_time = ccall(:jl_typeinf_timing_begin, UInt64, ())
     result = InferenceResult(mi, typeinf_lattice(interp))
-    cache_mode = run_optimizer ? :global : :no
-    frame = InferenceState(result, cache_mode, interp)
+    frame = InferenceState(result, #=cache_mode=#:no, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
-    ccall(:jl_typeinf_timing_end, Cvoid, (UInt64,), start_time)
+    is_inferred(frame) || return nothing
+    if run_optimizer
+        if result_is_constabi(interp, frame.result)
+            rt = frame.result.result::Const
+            opt = codeinfo_for_const(interp, frame.linfo, rt.val)
+        else
+            opt = OptimizationState(frame, interp)
+            optimize(interp, opt, frame.result)
+            opt = ir_to_codeinf!(opt)
+        end
+        result.src = frame.src = opt
+    end
     return frame
 end
 
@@ -1118,7 +1011,7 @@ const SOURCE_MODE_FORCE_SOURCE = 0x2
 
 function ci_has_source(code::CodeInstance)
     inf = @atomic :monotonic code.inferred
-    return isa(inf, CodeInfo) || isa(inf, String)
+    return code.owner === nothing ? (isa(inf, CodeInfo) || isa(inf, String)) : inf !== nothing
 end
 
 """
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index 5987c30be2b91..86fa8af21615f 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -6,17 +6,42 @@
 
 # N.B.: Const/PartialStruct/InterConditional are defined in Core, to allow them to be used
 # inside the global code cache.
-#
-# # The type of a value might be constant
-# struct Const
-#     val
-# end
-#
-# struct PartialStruct
-#     typ
-#     fields::Vector{Any} # elements are other type lattice members
-# end
+
 import Core: Const, PartialStruct
+
+"""
+    struct Const
+        val
+    end
+
+The type representing a constant value.
+"""
+:(Const)
+
+"""
+    struct PartialStruct
+        typ
+        fields::Vector{Any} # elements are other type lattice members
+    end
+
+This extended lattice element is introduced when we have information about an object's
+fields beyond what can be obtained from the object type. E.g. it represents a tuple where
+some elements are known to be constants or a struct whose `Any`-typed field is initialized
+with `Int` values.
+
+- `typ` indicates the type of the object
+- `fields` holds the lattice elements corresponding to each field of the object
+
+If `typ` is a struct, `fields` represents the fields of the struct that are guaranteed to be
+initialized. For instance, if the length of `fields` of `PartialStruct` representing a
+struct with 4 fields is 3, the 4th field may not be initialized. If the length is 4, all
+fields are guaranteed to be initialized.
+
+If `typ` is a tuple, the last element of `fields` may be `Vararg`. In this case, it is
+guaranteed that the number of elements in the tuple is at least `length(fields)-1`, but the
+exact number of elements is unknown.
+"""
+:(PartialStruct)
 function PartialStruct(@nospecialize(typ), fields::Vector{Any})
     for i = 1:length(fields)
         assert_nested_slotwrapper(fields[i])
@@ -48,17 +73,27 @@ struct Conditional
     slot::Int
     thentype
     elsetype
-    function Conditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype))
+    # `isdefined` indicates this `Conditional` is from `@isdefined slot`, implying that
+    # the `undef` information of `slot` can be improved in the then branch.
+    # Since this is only beneficial for local inference, it is not translated into `InterConditional`.
+    isdefined::Bool
+    function Conditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype);
+                         isdefined::Bool=false)
         assert_nested_slotwrapper(thentype)
         assert_nested_slotwrapper(elsetype)
-        return new(slot, thentype, elsetype)
+        return new(slot, thentype, elsetype, isdefined)
     end
 end
-Conditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
-    Conditional(slot_id(var), thentype, elsetype)
+Conditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype); isdefined::Bool=false) =
+    Conditional(slot_id(var), thentype, elsetype; isdefined)
 
+import Core: InterConditional
 """
-    cnd::InterConditional
+    struct InterConditional
+        slot::Int
+        thentype
+        elsetype
+    end
 
 Similar to `Conditional`, but conveys inter-procedural constraints imposed on call arguments.
 This is separate from `Conditional` to catch logic errors: the lattice element name is `InterConditional`
@@ -66,14 +101,6 @@ while processing a call, then `Conditional` everywhere else. Thus `InterConditio
 `CompilerTypes`—these type's usages are disjoint—though we define the lattice for `InterConditional`.
 """
 :(InterConditional)
-import Core: InterConditional
-# struct InterConditional
-#     slot::Int
-#     thentype
-#     elsetype
-#     InterConditional(slot::Int, @nospecialize(thentype), @nospecialize(elsetype)) =
-#         new(slot, thentype, elsetype)
-# end
 InterConditional(var::SlotNumber, @nospecialize(thentype), @nospecialize(elsetype)) =
     InterConditional(slot_id(var), thentype, elsetype)
 
@@ -120,8 +147,6 @@ end
 MustAlias(var::SlotNumber, @nospecialize(vartyp), fldidx::Int, @nospecialize(fldtyp)) =
     MustAlias(slot_id(var), vartyp, fldidx, fldtyp)
 
-_uniontypes(x::MustAlias, ts) = _uniontypes(widenconst(x), ts)
-
 """
     alias::InterMustAlias
 
@@ -159,8 +184,8 @@ end
 struct StateUpdate
     var::SlotNumber
     vtype::VarState
-    state::VarTable
     conditional::Bool
+    StateUpdate(var::SlotNumber, vtype::VarState, conditional::Bool=false) = new(var, vtype, conditional)
 end
 
 """
@@ -286,11 +311,17 @@ end
 
 # `Conditional` and `InterConditional` are valid in opposite contexts
 # (i.e. local inference and inter-procedural call), as such they will never be compared
-@nospecializeinfer function issubconditional(lattice::AbstractLattice, a::C, b::C) where {C<:AnyConditional}
+@nospecializeinfer issubconditional(𝕃::AbstractLattice, a::Conditional, b::Conditional) =
+    _issubconditional(𝕃, a, b, #=check_isdefined=#true)
+@nospecializeinfer issubconditional(𝕃::AbstractLattice, a::InterConditional, b::InterConditional) =
+    _issubconditional(𝕃, a, b, #=check_isdefined=#false)
+@nospecializeinfer function _issubconditional(𝕃::AbstractLattice, a::C, b::C, check_isdefined::Bool) where C<:AnyConditional
     if is_same_conditionals(a, b)
-        if ⊑(lattice, a.thentype, b.thentype)
-            if ⊑(lattice, a.elsetype, b.elsetype)
-                return true
+        if ⊑(𝕃, a.thentype, b.thentype)
+            if ⊑(𝕃, a.elsetype, b.elsetype)
+                if !check_isdefined || a.isdefined ≥ b.isdefined
+                    return true
+                end
             end
         end
     end
@@ -394,8 +425,8 @@ ignorelimited(typ::LimitedAccuracy) = typ.typ
 # =============
 
 @nospecializeinfer function ⊑(lattice::InferenceLattice, @nospecialize(a), @nospecialize(b))
-    r = ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b))
-    r || return false
+    ⊑(widenlattice(lattice), ignorelimited(a), ignorelimited(b)) || return false
+
     isa(b, LimitedAccuracy) || return true
 
     # We've found that ignorelimited(a) ⊑ ignorelimited(b).
@@ -448,8 +479,13 @@ end
 @nospecializeinfer function ⊑(lattice::PartialsLattice, @nospecialize(a), @nospecialize(b))
     if isa(a, PartialStruct)
         if isa(b, PartialStruct)
-            if !(length(a.fields) == length(b.fields) && a.typ <: b.typ)
-                return false
+            a.typ <: b.typ || return false
+            if length(a.fields) ≠ length(b.fields)
+                if !(isvarargtype(a.fields[end]) || isvarargtype(b.fields[end]))
+                    length(a.fields) ≥ length(b.fields) || return false
+                else
+                    return false
+                end
             end
             for i in 1:length(b.fields)
                 af = a.fields[i]
@@ -472,19 +508,25 @@ end
         return isa(b, Type) && a.typ <: b
     elseif isa(b, PartialStruct)
         if isa(a, Const)
-            nf = nfields(a.val)
-            nf == length(b.fields) || return false
             widea = widenconst(a)::DataType
             wideb = widenconst(b)
             wideb′ = unwrap_unionall(wideb)::DataType
             widea.name === wideb′.name || return false
-            # We can skip the subtype check if b is a Tuple, since in that
-            # case, the ⊑ of the elements is sufficient.
-            if wideb′.name !== Tuple.name && !(widea <: wideb)
-                return false
+            if wideb′.name === Tuple.name
+                # We can skip the subtype check if b is a Tuple, since in that
+                # case, the ⊑ of the elements is sufficient.
+                # But for tuple comparisons, we need their lengths to be the same for now.
+                # TODO improve accuracy for cases when `b` contains vararg element
+                nfields(a.val) == length(b.fields) || return false
+            else
+                widea <: wideb || return false
+                # for structs we need to check that `a` has more information than `b` that may be partially initialized
+                n_initialized(a) ≥ length(b.fields) || return false
             end
+            nf = nfields(a.val)
             for i in 1:nf
                 isdefined(a.val, i) || continue # since ∀ T Union{} ⊑ T
+                i > length(b.fields) && break # `a` has more information than `b` that is partially initialized struct
                 bfᵢ = b.fields[i]
                 if i == nf
                     bfᵢ = unwrapva(bfᵢ)
@@ -724,28 +766,6 @@ function invalidate_slotwrapper(vt::VarState, changeid::Int, ignore_conditional:
     return nothing
 end
 
-function stupdate!(lattice::AbstractLattice, state::VarTable, changes::StateUpdate)
-    changed = false
-    changeid = slot_id(changes.var)
-    for i = 1:length(state)
-        if i == changeid
-            newtype = changes.vtype
-        else
-            newtype = changes.state[i]
-        end
-        invalidated = invalidate_slotwrapper(newtype, changeid, changes.conditional)
-        if invalidated !== nothing
-            newtype = invalidated
-        end
-        oldtype = state[i]
-        if schanged(lattice, newtype, oldtype)
-            state[i] = smerge(lattice, oldtype, newtype)
-            changed = true
-        end
-    end
-    return changed
-end
-
 function stupdate!(lattice::AbstractLattice, state::VarTable, changes::VarTable)
     changed = false
     for i = 1:length(state)
diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index 318ac0b5c27e5..3d0e5f3d0877d 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -321,6 +321,11 @@ end
 # even after complicated recursion and other operations on it elsewhere
 const issimpleenoughtupleelem = issimpleenoughtype
 
+function n_initialized(t::Const)
+    nf = nfields(t.val)
+    return something(findfirst(i::Int->!isdefined(t.val,i), 1:nf), nf+1)-1
+end
+
 # A simplified type_more_complex query over the extended lattice
 # (assumes typeb ⊑ typea)
 @nospecializeinfer function issimplertype(𝕃::AbstractLattice, @nospecialize(typea), @nospecialize(typeb))
@@ -328,6 +333,13 @@ const issimpleenoughtupleelem = issimpleenoughtype
     typea === typeb && return true
     if typea isa PartialStruct
         aty = widenconst(typea)
+        if typeb isa Const
+            @assert length(typea.fields) ≤ n_initialized(typeb) "typeb ⊑ typea is assumed"
+        elseif typeb isa PartialStruct
+            @assert length(typea.fields) ≤ length(typeb.fields) "typeb ⊑ typea is assumed"
+        else
+            return false
+        end
         for i = 1:length(typea.fields)
             ai = unwrapva(typea.fields[i])
             bi = fieldtype(aty, i)
@@ -572,34 +584,38 @@ end
 
 # N.B. This can also be called with both typea::Const and typeb::Const to
 # to recover PartialStruct from `Const`s with overlapping fields.
-@nospecializeinfer function tmerge_partial_struct(lattice::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
+@nospecializeinfer function tmerge_partial_struct(𝕃::PartialsLattice, @nospecialize(typea), @nospecialize(typeb))
     aty = widenconst(typea)
     bty = widenconst(typeb)
     if aty === bty
-        # must have egal here, since we do not create PartialStruct for non-concrete types
-        typea_nfields = nfields_tfunc(lattice, typea)
-        typeb_nfields = nfields_tfunc(lattice, typeb)
-        isa(typea_nfields, Const) || return nothing
-        isa(typeb_nfields, Const) || return nothing
-        type_nfields = typea_nfields.val::Int
-        type_nfields === typeb_nfields.val::Int || return nothing
-        type_nfields == 0 && return nothing
-        fields = Vector{Any}(undef, type_nfields)
-        anyrefine = false
-        for i = 1:type_nfields
-            ai = getfield_tfunc(lattice, typea, Const(i))
-            bi = getfield_tfunc(lattice, typeb, Const(i))
+        if typea isa PartialStruct
+            if typeb isa PartialStruct
+                nflds = min(length(typea.fields), length(typeb.fields))
+            else
+                nflds = min(length(typea.fields), n_initialized(typeb::Const))
+            end
+        elseif typeb isa PartialStruct
+            nflds = min(n_initialized(typea::Const), length(typeb.fields))
+        else
+            nflds = min(n_initialized(typea::Const), n_initialized(typeb::Const))
+        end
+        nflds == 0 && return nothing
+        fields = Vector{Any}(undef, nflds)
+        anyrefine = nflds > datatype_min_ninitialized(unwrap_unionall(aty))
+        for i = 1:nflds
+            ai = getfield_tfunc(𝕃, typea, Const(i))
+            bi = getfield_tfunc(𝕃, typeb, Const(i))
             # N.B.: We're assuming here that !isType(aty), because that case
             # only arises when typea === typeb, which should have been caught
             # before calling this.
             ft = fieldtype(aty, i)
-            if is_lattice_equal(lattice, ai, bi) || is_lattice_equal(lattice, ai, ft)
+            if is_lattice_equal(𝕃, ai, bi) || is_lattice_equal(𝕃, ai, ft)
                 # Since ai===bi, the given type has no restrictions on complexity.
                 # and can be used to refine ft
                 tyi = ai
-            elseif is_lattice_equal(lattice, bi, ft)
+            elseif is_lattice_equal(𝕃, bi, ft)
                 tyi = bi
-            elseif (tyi′ = tmerge_field(lattice, ai, bi); tyi′ !== nothing)
+            elseif (tyi′ = tmerge_field(𝕃, ai, bi); tyi′ !== nothing)
                 # allow external lattice implementation to provide a custom field-merge strategy
                 tyi = tyi′
             else
@@ -621,8 +637,8 @@ end
             end
             fields[i] = tyi
             if !anyrefine
-                anyrefine = has_nontrivial_extended_info(lattice, tyi) || # extended information
-                            ⋤(lattice, tyi, ft) # just a type-level information, but more precise than the declared type
+                anyrefine = has_nontrivial_extended_info(𝕃, tyi) || # extended information
+                            ⋤(𝕃, tyi, ft) # just a type-level information, but more precise than the declared type
             end
         end
         anyrefine && return PartialStruct(aty, fields)
@@ -815,6 +831,7 @@ end
                     typenames[i] = Any.name
                     simplify[i] = false
                     types[j] = widen
+                    typenames[j] = ijname
                     break
                 end
             end
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 7021601bf87cf..ecf2417fd6199 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -1,4 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+#
+
+const WorkThunk = Any
+# #@eval struct WorkThunk
+#    thunk::Core.OpaqueClosure{Tuple{Vector{Tasks}}, Bool}
+#    WorkThunk(work) = new($(Expr(:opaque_closure, :(Tuple{Vector{Tasks}}), :Bool, :Bool, :((tasks) -> work(tasks))))) # @opaque Vector{Tasks}->Bool (tasks)->work(tasks)
+# end
+# (p::WorkThunk)() = p.thunk()
 
 """
     AbstractInterpreter
@@ -33,11 +41,14 @@ struct StmtInfo
     used::Bool
 end
 
-struct MethodInfo
+struct SpecInfo
+    nargs::Int
+    isva::Bool
     propagate_inbounds::Bool
     method_for_inference_limit_heuristics::Union{Nothing,Method}
 end
-MethodInfo(src::CodeInfo) = MethodInfo(
+SpecInfo(src::CodeInfo) = SpecInfo(
+    Int(src.nargs), src.isva,
     src.propagate_inbounds,
     src.method_for_inference_limit_heuristics::Union{Nothing,Method})
 
@@ -156,11 +167,6 @@ Parameters that control abstract interpretation-based type inference operation.
   information available. [`Base.@constprop :aggressive`](@ref Base.@constprop) can have a
   more fine-grained control on this configuration with per-method annotation basis.
 ---
-- `inf_params.unoptimize_throw_blocks::Bool = true`\\
-  If `true`, skips inferring calls that are in a block that is known to `throw`.
-  It may improve the compiler latency without sacrificing the runtime performance
-  in common situations.
----
 - `inf_params.assume_bindings_static::Bool = false`\\
   If `true`, assumes that no new bindings will be added, i.e. a non-existing binding at
   inference time can be assumed to always not exist at runtime (and thus e.g. any access to
@@ -176,7 +182,6 @@ struct InferenceParams
     tuple_complexity_limit_depth::Int
     ipo_constant_propagation::Bool
     aggressive_constant_propagation::Bool
-    unoptimize_throw_blocks::Bool
     assume_bindings_static::Bool
     ignore_recursion_hardlimit::Bool
 
@@ -188,7 +193,6 @@ struct InferenceParams
         tuple_complexity_limit_depth::Int,
         ipo_constant_propagation::Bool,
         aggressive_constant_propagation::Bool,
-        unoptimize_throw_blocks::Bool,
         assume_bindings_static::Bool,
         ignore_recursion_hardlimit::Bool)
         return new(
@@ -199,7 +203,6 @@ struct InferenceParams
             tuple_complexity_limit_depth,
             ipo_constant_propagation,
             aggressive_constant_propagation,
-            unoptimize_throw_blocks,
             assume_bindings_static,
             ignore_recursion_hardlimit)
     end
@@ -213,7 +216,6 @@ function InferenceParams(
         #=tuple_complexity_limit_depth::Int=# 3,
         #=ipo_constant_propagation::Bool=# true,
         #=aggressive_constant_propagation::Bool=# false,
-        #=unoptimize_throw_blocks::Bool=# BuildSettings.UNOPTIMIZE_THROW_BLOCKS,
         #=assume_bindings_static::Bool=# false,
         #=ignore_recursion_hardlimit::Bool=# false);
     max_methods::Int = params.max_methods,
@@ -223,7 +225,6 @@ function InferenceParams(
     tuple_complexity_limit_depth::Int = params.tuple_complexity_limit_depth,
     ipo_constant_propagation::Bool = params.ipo_constant_propagation,
     aggressive_constant_propagation::Bool = params.aggressive_constant_propagation,
-    unoptimize_throw_blocks::Bool = params.unoptimize_throw_blocks,
     assume_bindings_static::Bool = params.assume_bindings_static,
     ignore_recursion_hardlimit::Bool = params.ignore_recursion_hardlimit)
     return InferenceParams(
@@ -234,7 +235,6 @@ function InferenceParams(
         tuple_complexity_limit_depth,
         ipo_constant_propagation,
         aggressive_constant_propagation,
-        unoptimize_throw_blocks,
         assume_bindings_static,
         ignore_recursion_hardlimit)
 end
@@ -259,10 +259,6 @@ Parameters that control optimizer operation.
   tuple return types (in hopes of splitting it up). `opt_params.inline_tupleret_bonus` will
   be added to `opt_params.inline_cost_threshold` when making inlining decision.
 ---
-- `opt_params.inline_error_path_cost::Int = 20`\\
-  Specifies the penalty cost for an un-optimized dynamic call in a block that is known to
-  `throw`. See also [`(inf_params::InferenceParams).unoptimize_throw_blocks`](@ref InferenceParams).
----
 - `opt_params.max_tuple_splat::Int = 32`\\
   When attempting to inline `Core._apply_iterate`, abort the optimization if the tuple
   contains more than this many elements.
@@ -289,7 +285,6 @@ struct OptimizationParams
     inline_cost_threshold::Int
     inline_nonleaf_penalty::Int
     inline_tupleret_bonus::Int
-    inline_error_path_cost::Int
     max_tuple_splat::Int
     compilesig_invokes::Bool
     assume_fatal_throw::Bool
@@ -300,7 +295,6 @@ struct OptimizationParams
         inline_cost_threshold::Int,
         inline_nonleaf_penalty::Int,
         inline_tupleret_bonus::Int,
-        inline_error_path_cost::Int,
         max_tuple_splat::Int,
         compilesig_invokes::Bool,
         assume_fatal_throw::Bool,
@@ -310,7 +304,6 @@ struct OptimizationParams
             inline_cost_threshold,
             inline_nonleaf_penalty,
             inline_tupleret_bonus,
-            inline_error_path_cost,
             max_tuple_splat,
             compilesig_invokes,
             assume_fatal_throw,
@@ -323,7 +316,6 @@ function OptimizationParams(
         #=inline_cost_threshold::Int=# 100,
         #=inline_nonleaf_penalty::Int=# 1000,
         #=inline_tupleret_bonus::Int=# 250,
-        #=inline_error_path_cost::Int=# 20,
         #=max_tuple_splat::Int=# 32,
         #=compilesig_invokes::Bool=# true,
         #=assume_fatal_throw::Bool=# false,
@@ -332,7 +324,6 @@ function OptimizationParams(
     inline_cost_threshold::Int = params.inline_cost_threshold,
     inline_nonleaf_penalty::Int = params.inline_nonleaf_penalty,
     inline_tupleret_bonus::Int = params.inline_tupleret_bonus,
-    inline_error_path_cost::Int = params.inline_error_path_cost,
     max_tuple_splat::Int = params.max_tuple_splat,
     compilesig_invokes::Bool = params.compilesig_invokes,
     assume_fatal_throw::Bool = params.assume_fatal_throw,
@@ -342,7 +333,6 @@ function OptimizationParams(
         inline_cost_threshold,
         inline_nonleaf_penalty,
         inline_tupleret_bonus,
-        inline_error_path_cost,
         max_tuple_splat,
         compilesig_invokes,
         assume_fatal_throw,
@@ -471,10 +461,16 @@ abstract type CallInfo end
 
 nsplit(info::CallInfo) = nsplit_impl(info)::Union{Nothing,Int}
 getsplit(info::CallInfo, idx::Int) = getsplit_impl(info, idx)::MethodLookupResult
+add_uncovered_edges!(edges::Vector{Any}, info::CallInfo, @nospecialize(atype)) = add_uncovered_edges_impl(edges, info, atype)
+
 getresult(info::CallInfo, idx::Int) = getresult_impl(info, idx)
 
+# must implement `nsplit`, `getsplit`, and `add_uncovered_edges!` to opt in to inlining
 nsplit_impl(::CallInfo) = nothing
 getsplit_impl(::CallInfo, ::Int) = error("unexpected call into `getsplit`")
+add_uncovered_edges_impl(::Vector{Any}, ::CallInfo, _) = error("unexpected call into `add_uncovered_edges!`")
+
+# must implement `getresult` to opt in to extended lattice return information
 getresult_impl(::CallInfo, ::Int) = nothing
 
 @specialize
diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl
index a4499e003cf2c..577452a873b5e 100644
--- a/base/compiler/typeutils.jl
+++ b/base/compiler/typeutils.jl
@@ -18,7 +18,7 @@ function hasuniquerep(@nospecialize t)
     iskindtype(typeof(t)) || return true # non-types are always compared by egal in the type system
     isconcretetype(t) && return true # these are also interned and pointer comparable
     if isa(t, DataType) && t.name !== Tuple.name && !isvarargtype(t) # invariant DataTypes
-        return _all(hasuniquerep, t.parameters)
+        return all(hasuniquerep, t.parameters)
     end
     return false
 end
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index 527c6ab42eb2d..b3dfd73d53452 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -48,15 +48,6 @@ anymap(f::Function, a::Array{Any,1}) = Any[ f(a[i]) for i in 1:length(a) ]
 
 _topmod(m::Module) = ccall(:jl_base_relative_to, Any, (Any,), m)::Module
 
-function istopfunction(@nospecialize(f), name::Symbol)
-    tn = typeof(f).name
-    if tn.mt.name === name
-        top = _topmod(tn.module)
-        return isdefined(top, name) && isconst(top, name) && f === getglobal(top, name)
-    end
-    return false
-end
-
 #######
 # AST #
 #######
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index ba8e86eeb042c..78db5ef5e4ed8 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -22,6 +22,7 @@ const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :copyast => 1:1,
     :meta => 0:typemax(Int),
     :global => 1:1,
+    :globaldecl => 2:2,
     :foreigncall => 5:typemax(Int), # name, RT, AT, nreq, (cconv, effects), args..., roots...
     :cfunction => 5:5,
     :isdefined => 1:2,
@@ -256,7 +257,9 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast, :new_opaque_closure)
+    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call,
+        :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast,
+        :new_opaque_closure)
         return true
     end
     return false
diff --git a/base/complex.jl b/base/complex.jl
index 8ac126d2c6532..095c842795d38 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -1037,24 +1037,22 @@ end
 function atanh(z::Complex{T}) where T
     z = float(z)
     Tf = float(T)
-    Ω = prevfloat(typemax(Tf))
-    θ = sqrt(Ω)/4
-    ρ = 1/θ
     x, y = reim(z)
     ax = abs(x)
     ay = abs(y)
+    θ = sqrt(floatmax(Tf))/4
     if ax > θ || ay > θ #Prevent overflow
         if isnan(y)
             if isinf(x)
                 return Complex(copysign(zero(x),x), y)
             else
-                return Complex(real(1/z), y)
+                return Complex(real(inv(z)), y)
             end
         end
         if isinf(y)
             return Complex(copysign(zero(x),x), copysign(oftype(y,pi)/2, y))
         end
-        return Complex(real(1/z), copysign(oftype(y,pi)/2, y))
+        return Complex(real(inv(z)), copysign(oftype(y,pi)/2, y))
     end
     β = copysign(one(Tf), x)
     z *= β
@@ -1064,16 +1062,15 @@ function atanh(z::Complex{T}) where T
             ξ = oftype(x, Inf)
             η = y
         else
-            ym = ay+ρ
-            ξ = log(sqrt(sqrt(4+y*y))/sqrt(ym))
-            η = copysign(oftype(y,pi)/2 + atan(ym/2), y)/2
+            ξ = log(sqrt(sqrt(muladd(y, y, 4)))/sqrt(ay))
+            η = copysign(oftype(y,pi)/2 + atan(ay/2), y)/2
         end
     else #Normal case
-        ysq = (ay+ρ)^2
+        ysq = ay^2
         if x == 0
             ξ = x
         else
-            ξ = log1p(4x/((1-x)^2 + ysq))/4
+            ξ = log1p(4x/(muladd(1-x, 1-x, ysq)))/4
         end
         η = angle(Complex((1-x)*(1+x)-ysq, 2y))/2
     end
diff --git a/base/condition.jl b/base/condition.jl
index 52781f348eb0d..fd771c9be346a 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -69,6 +69,8 @@ struct GenericCondition{L<:AbstractLock}
     GenericCondition(l::AbstractLock) = new{typeof(l)}(IntrusiveLinkedList{Task}(), l)
 end
 
+show(io::IO, c::GenericCondition) = print(io, GenericCondition, "(", c.lock, ")")
+
 assert_havelock(c::GenericCondition) = assert_havelock(c.lock)
 lock(c::GenericCondition) = lock(c.lock)
 unlock(c::GenericCondition) = unlock(c.lock)
@@ -138,7 +140,7 @@ function wait(c::GenericCondition; first::Bool=false)
     try
         return wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         rethrow()
     finally
         relockall(c.lock, token)
@@ -194,6 +196,8 @@ This object is NOT thread-safe. See [`Threads.Condition`](@ref) for a thread-saf
 """
 const Condition = GenericCondition{AlwaysLockedST}
 
+show(io::IO, ::Condition) = print(io, Condition, "()")
+
 lock(c::GenericCondition{AlwaysLockedST}) =
     throw(ArgumentError("`Condition` is not thread-safe. Please use `Threads.Condition` instead for multi-threaded code."))
 unlock(c::GenericCondition{AlwaysLockedST}) =
diff --git a/base/deprecated.jl b/base/deprecated.jl
index b478995522a58..b43a4227d42c4 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -119,9 +119,7 @@ export __has_internal_change
 # and of exporting the function.
 #
 # For more complex cases, move the body of the deprecated method in this file,
-# and call depwarn() directly from inside it. The symbol depwarn() expects is
-# the name of the function, which is used to ensure that the deprecation warning
-# is only printed the first time for each call place.
+# and call depwarn() directly from inside it.
 
 """
     @deprecate old new [export_old=true]
@@ -131,6 +129,8 @@ with the specified signature in the process.
 
 To prevent `old` from being exported, set `export_old` to `false`.
 
+See also [`Base.depwarn()`](@ref).
+
 !!! compat "Julia 1.5"
     As of Julia 1.5, functions defined by `@deprecate` do not print warning when `julia`
     is run without the `--depwarn=yes` flag set, as the default value of `--depwarn` option
@@ -227,6 +227,26 @@ macro deprecate(old, new, export_old=true)
     end
 end
 
+"""
+    Base.depwarn(msg::String, funcsym::Symbol; force=false)
+
+Print `msg` as a deprecation warning. The symbol `funcsym` should be the name
+of the calling function, which is used to ensure that the deprecation warning is
+only printed the first time for each call place. Set `force=true` to force the
+warning to always be shown, even if Julia was started with `--depwarn=no` (the
+default).
+
+See also [`@deprecate`](@ref).
+
+# Examples
+```julia
+function deprecated_func()
+    Base.depwarn("Don't use `deprecated_func()`!", :deprecated_func)
+
+    1 + 1
+end
+```
+"""
 @nospecializeinfer function depwarn(msg, funcsym; force::Bool=false)
     @nospecialize
     # N.B. With this use of `@invokelatest`, we're preventing the addition of backedges from
@@ -412,7 +432,8 @@ const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
 
 # the plan is to eventually overload getproperty to access entries of the dict
 @noinline function getproperty(x::Pairs, s::Symbol)
-    depwarn("use values(kwargs) and keys(kwargs) instead of kwargs.data and kwargs.itr", :getproperty, force=true)
+    s == :data && depwarn("use values(kwargs) instead of kwargs.data", :getproperty, force=true)
+    s == :itr && depwarn("use keys(kwargs) instead of kwargs.itr", :getproperty, force=true)
     return getfield(x, s)
 end
 
@@ -506,3 +527,9 @@ end
 @deprecate invpermute!!(a, p::AbstractVector{<:Integer}) invpermute!(a, p) false
 
 # END 1.11 deprecations
+
+# BEGIN 1.12 deprecations
+
+@deprecate stat(fd::Integer) stat(RawFD(fd))
+
+# END 1.12 deprecations
diff --git a/base/div.jl b/base/div.jl
index 8988f2b70f27b..3fec8d2f5cdf3 100644
--- a/base/div.jl
+++ b/base/div.jl
@@ -43,6 +43,21 @@ julia> div(4, 3, RoundFromZero)
 julia> div(-4, 3, RoundFromZero)
 -2
 ```
+Because `div(x, y)` implements strictly correct truncated rounding based on the true
+value of floating-point numbers, unintuitive situations can arise. For example:
+```jldoctest
+julia> div(6.0, 0.1)
+59.0
+julia> 6.0 / 0.1
+60.0
+julia> 6.0 / big(0.1)
+59.99999999999999666933092612453056361837965690217069245739573412231113406246995
+```
+What is happening here is that the true value of the floating-point number written
+as `0.1` is slightly larger than the numerical value 1/10 while `6.0` represents
+the number 6 precisely. Therefore the true value of `6.0 / 0.1` is slightly less
+than 60. When doing division, this is rounded to precisely `60.0`, but
+`div(6.0, 0.1, RoundToZero)` always truncates the true value, so the result is `59.0`.
 """
 div(x, y, r::RoundingMode)
 
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index b7a17f2e3ee70..1a2403bbb8644 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -577,6 +577,10 @@ function _doc(binding::Binding, sig::Type = Union{})
             for msig in multidoc.order
                 sig <: msig && return multidoc.docs[msig]
             end
+            # if no matching signatures, return first
+            if !isempty(multidoc.docs)
+                return first(values(multidoc.docs))
+            end
         end
     end
     return nothing
@@ -610,9 +614,8 @@ function docm(source::LineNumberNode, mod::Module, ex)
     @nospecialize ex
     if isexpr(ex, :->) && length(ex.args) > 1
         return docm(source, mod, ex.args...)
-    elseif isassigned(Base.REPL_MODULE_REF)
+    elseif (REPL = Base.REPL_MODULE_REF[]) !== Base
         # TODO: this is a shim to continue to allow `@doc` for looking up docstrings
-        REPL = Base.REPL_MODULE_REF[]
         return invokelatest(REPL.lookup_doc, ex)
     else
         return simple_lookup_doc(ex)
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index 19bafbe3de3a4..a142ecffdb732 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -663,8 +663,11 @@ kw"{", kw"{}", kw"}"
 """
     []
 
-Square braces are used for [indexing](@ref man-array-indexing), [indexed assignment](@ref man-indexed-assignment),
-[array literals](@ref man-array-literals), and [array comprehensions](@ref man-comprehensions).
+Square brackets are used for [indexing](@ref man-array-indexing) ([`getindex`](@ref)),
+[indexed assignment](@ref man-indexed-assignment) ([`setindex!`](@ref)),
+[array literals](@ref man-array-literals) ([`Base.vect`](@ref)),
+[array concatenation](@ref man-array-concatenation) ([`vcat`](@ref), [`hcat`](@ref), [`hvcat`](@ref), [`hvncat`](@ref)),
+and [array comprehensions](@ref man-comprehensions) ([`collect`](@ref)).
 """
 kw"[", kw"[]", kw"]"
 
@@ -934,11 +937,14 @@ expression, rather than the side effects that evaluating `b` or `c` may have.
 See the manual section on [control flow](@ref man-conditional-evaluation) for more details.
 
 # Examples
-```
+```jldoctest
 julia> x = 1; y = 2;
 
-julia> x > y ? println("x is larger") : println("y is larger")
-y is larger
+julia> x > y ? println("x is larger") : println("x is not larger")
+x is not larger
+
+julia> x > y ? "x is larger" : x == y ? "x and y are equal" : "y is larger"
+"y is larger"
 ```
 """
 kw"?", kw"?:"
@@ -1050,6 +1056,17 @@ exception object to the given variable within the `catch` block.
 The power of the `try`/`catch` construct lies in the ability to unwind a deeply
 nested computation immediately to a much higher level in the stack of calling functions.
 
+A `try/catch` block can also have an `else` clause that executes only if no exception occurred:
+```julia
+try
+    a_dangerous_operation()
+catch
+    @warn "The operation failed."
+else
+    @info "The operation succeeded."
+end
+```
+
 A `try` or `try`/`catch` block can also have a [`finally`](@ref) clause that executes
 at the end, regardless of whether an exception occurred.  For example, this can be
 used to guarantee that an opened file is closed:
@@ -1064,6 +1081,9 @@ finally
 end
 ```
 (`finally` can also be used without a `catch` block.)
+
+!!! compat "Julia 1.8"
+    Else clauses require at least Julia 1.8.
 """
 kw"try", kw"catch"
 
@@ -1372,7 +1392,11 @@ Usually `begin` will not be necessary, since keywords such as [`function`](@ref)
 implicitly begin blocks of code. See also [`;`](@ref).
 
 `begin` may also be used when indexing to represent the first index of a
-collection or the first index of a dimension of an array.
+collection or the first index of a dimension of an array. For example,
+`a[begin]` is the first element of an array `a`.
+
+!!! compat "Julia 1.4"
+    Use of `begin` as an index requires Julia 1.4 or later.
 
 # Examples
 ```jldoctest
@@ -1433,8 +1457,20 @@ kw"struct"
     mutable struct
 
 `mutable struct` is similar to [`struct`](@ref), but additionally allows the
-fields of the type to be set after construction. See the manual section on
-[Composite Types](@ref) for more information.
+fields of the type to be set after construction.
+
+Individual fields of a mutable struct can be marked as `const` to make them immutable:
+
+```julia
+mutable struct Baz
+    a::Int
+    const b::Float64
+end
+```
+!!! compat "Julia 1.8"
+    The `const` keyword for fields of mutable structs requires at least Julia 1.8.
+
+See the manual section on [Composite Types](@ref) for more information.
 """
 kw"mutable struct"
 
@@ -1661,7 +1697,7 @@ julia> ab = AB(1, 3)
 AB(1.0f0, 3.0)
 
 julia> ab.c # field `c` doesn't exist
-ERROR: FieldError: type AB has no field c
+ERROR: FieldError: type AB has no field `c`, available fields: `a`, `b`
 Stacktrace:
 [...]
 ```
@@ -1805,12 +1841,15 @@ Stacktrace:
 DomainError
 
 """
-    Task(func)
+    Task(func[, reserved_stack::Int])
 
 Create a `Task` (i.e. coroutine) to execute the given function `func` (which
 must be callable with no arguments). The task exits when this function returns.
 The task will run in the "world age" from the parent at construction when [`schedule`](@ref)d.
 
+The optional `reserved_stack` argument specifies the size of the stack available
+for this task, in bytes. The default, `0`, uses the system-dependent stack size default.
+
 !!! warning
     By default tasks will have the sticky bit set to true `t.sticky`. This models the
     historic default for [`@async`](@ref). Sticky tasks can only be run on the worker thread
@@ -3152,14 +3191,27 @@ Any
 """
     Union{}
 
-`Union{}`, the empty [`Union`](@ref) of types, is the type that has no values. That is, it has the defining
-property `isa(x, Union{}) == false` for any `x`. `Base.Bottom` is defined as its alias and the type of `Union{}`
-is `Core.TypeofBottom`.
+`Union{}`, the empty [`Union`](@ref) of types, is the *bottom* type of the type system. That is, for each
+`T::Type`, `Union{} <: T`. Also see the subtyping operator's documentation: [`<:`](@ref).
+
+As such, `Union{}` is also an *empty*/*uninhabited* type, meaning that it has no values. That is, for each `x`,
+`isa(x, Union{}) == false`.
+
+`Base.Bottom` is defined as its alias and the type of `Union{}` is `Core.TypeofBottom`.
 
 # Examples
 ```jldoctest
 julia> isa(nothing, Union{})
 false
+
+julia> Union{} <: Int
+true
+
+julia> typeof(Union{}) === Core.TypeofBottom
+true
+
+julia> isa(Union{}, Union)
+false
 ```
 """
 kw"Union{}", Base.Bottom
@@ -3663,6 +3715,9 @@ unused and delete the entire benchmark code).
     which the value of the arguments of this intrinsic were available (in a register,
     in memory, etc.).
 
+!!! compat "Julia 1.8"
+    This method was added in Julia 1.8.
+
 # Examples
 
 ```julia
diff --git a/base/error.jl b/base/error.jl
index d169cdc8085ac..c49ede624607d 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -232,12 +232,14 @@ macro assert(ex, msgs...)
         msg = msg # pass-through
     elseif !isempty(msgs) && (isa(msg, Expr) || isa(msg, Symbol))
         # message is an expression needing evaluating
-        msg = :(Main.Base.string($(esc(msg))))
+        # N.B. To reduce the risk of invalidation caused by the complex callstack involved
+        # with `string`, use `inferencebarrier` here to hide this `string` from the compiler.
+        msg = :(Main.Base.inferencebarrier(Main.Base.string)($(esc(msg))))
     elseif isdefined(Main, :Base) && isdefined(Main.Base, :string) && applicable(Main.Base.string, msg)
         msg = Main.Base.string(msg)
     else
         # string() might not be defined during bootstrap
-        msg = :(_assert_tostring($(Expr(:quote,msg))))
+        msg = :(Main.Base.inferencebarrier(_assert_tostring)($(Expr(:quote,msg))))
     end
     return :($(esc(ex)) ? $(nothing) : throw(AssertionError($msg)))
 end
diff --git a/base/errorshow.jl b/base/errorshow.jl
index a75f2f54b156d..20bdee1de6ec0 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -43,6 +43,15 @@ function showerror(io::IO, ex::Meta.ParseError)
     end
 end
 
+function showerror(io::IO, ex::Core.TypeNameError)
+    print(io, "TypeNameError: ")
+    if isa(ex.a, Union)
+        print(io, "typename does not apply to unions whose components have different typenames")
+    else
+        print(io, "typename does not apply to this type")
+    end
+end
+
 function showerror(io::IO, ex::BoundsError)
     print(io, "BoundsError")
     if isdefined(ex, :a)
@@ -196,7 +205,7 @@ function showerror(io::IO, ex::CanonicalIndexError)
     print(io, "CanonicalIndexError: ", ex.func, " not defined for ", ex.type)
 end
 
-typesof(@nospecialize args...) = Tuple{Any[ Core.Typeof(args[i]) for i in 1:length(args) ]...}
+typesof(@nospecialize args...) = Tuple{Any[Core.Typeof(arg) for arg in args]...}
 
 function print_with_compare(io::IO, @nospecialize(a::DataType), @nospecialize(b::DataType), color::Symbol)
     if a.name === b.name
@@ -273,7 +282,7 @@ function showerror(io::IO, ex::MethodError)
         arg_types_param = arg_types_param[3:end]
         san_arg_types_param = san_arg_types_param[3:end]
         keys = kwt.parameters[1]::Tuple
-        kwargs = Any[(keys[i], fieldtype(kwt, i)) for i in 1:length(keys)]
+        kwargs = Any[(keys[i], fieldtype(kwt, i)) for i in eachindex(keys)]
         arg_types = rewrap_unionall(Tuple{arg_types_param...}, arg_types)
     end
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
@@ -369,7 +378,7 @@ end
 
 function showerror(io::IO, exc::FieldError)
     @nospecialize
-    print(io, "FieldError: type $(exc.type |> nameof) has no field $(exc.field)")
+    print(io, "FieldError: type $(exc.type |> nameof) has no field `$(exc.field)`")
     Base.Experimental.show_error_hints(io, exc)
 end
 
@@ -694,7 +703,7 @@ function show_reduced_backtrace(io::IO, t::Vector)
 
     push!(repeated_cycle, (0,0,0)) # repeated_cycle is never empty
     frame_counter = 1
-    for i in 1:length(displayed_stackframes)
+    for i in eachindex(displayed_stackframes)
         (frame, n) = displayed_stackframes[i]
 
         print_stackframe(io, frame_counter, frame, n, ndigits_max, STACKTRACE_FIXEDCOLORS, STACKTRACE_MODULECOLORS)
@@ -871,7 +880,7 @@ end
 function _collapse_repeated_frames(trace)
     kept_frames = trues(length(trace))
     last_frame = nothing
-    for i in 1:length(trace)
+    for i in eachindex(trace)
         frame::StackFrame, _ = trace[i]
         if last_frame !== nothing && frame.file == last_frame.file && frame.line == last_frame.line
             #=
@@ -916,7 +925,7 @@ function _collapse_repeated_frames(trace)
                 end
                 if length(last_params) > length(params)
                     issame = true
-                    for i = 1:length(params)
+                    for i = eachindex(params)
                         issame &= params[i] == last_params[i]
                     end
                     if issame
@@ -1043,7 +1052,7 @@ function nonsetable_type_hint_handler(io, ex, arg_types, kwargs)
             print(io, "\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ")
             printstyled(io, "a[1, 2]", color=:cyan)
             print(io, " rather than a[1][2]")
-        else isType(T)
+        elseif isType(T)
             Tx = T.parameters[1]
             print(io, "\nYou attempted to index the type $Tx, rather than an instance of the type. Make sure you create the type using its constructor: ")
             printstyled(io, "d = $Tx([...])", color=:cyan)
@@ -1058,7 +1067,7 @@ Experimental.register_error_hint(nonsetable_type_hint_handler, MethodError)
 # (probably attempting concatenation)
 function string_concatenation_hint_handler(io, ex, arg_types, kwargs)
     @nospecialize
-    if (ex.f === +) && all(i -> i <: AbstractString, arg_types)
+    if (ex.f === +) && !isempty(arg_types) && all(i -> i <: AbstractString, arg_types)
         print(io, "\nString concatenation is performed with ")
         printstyled(io, "*", color=:cyan)
         print(io, " (See also: https://docs.julialang.org/en/v1/manual/strings/#man-concatenation).")
@@ -1093,7 +1102,7 @@ end
 Experimental.register_error_hint(methods_on_iterable, MethodError)
 
 # Display a hint in case the user tries to access non-member fields of container type datastructures
-function fielderror_hint_handler(io, exc)
+function fielderror_dict_hint_handler(io, exc)
     @nospecialize
     field = exc.field
     type = exc.type
@@ -1104,7 +1113,32 @@ function fielderror_hint_handler(io, exc)
     end
 end
 
-Experimental.register_error_hint(fielderror_hint_handler, FieldError)
+Experimental.register_error_hint(fielderror_dict_hint_handler, FieldError)
+
+function fielderror_listfields_hint_handler(io, exc)
+    fields = fieldnames(exc.type)
+    if isempty(fields)
+        print(io, "; $(nameof(exc.type)) has no fields at all.")
+    else
+        print(io, ", available fields: $(join(map(k -> "`$k`", fields), ", "))")
+    end
+    props = _propertynames_bytype(exc.type)
+    isnothing(props) && return
+    props = setdiff(props, fields)
+    isempty(props) && return
+    print(io, "\nAvailable properties: $(join(map(k -> "`$k`", props), ", "))")
+end
+
+function _propertynames_bytype(T::Type)
+    which(propertynames, (T,)) === which(propertynames, (Any,)) && return nothing
+    inferred_names = promote_op(Val∘propertynames, T)
+    inferred_names isa DataType && inferred_names <: Val || return nothing
+    inferred_names = inferred_names.parameters[1]
+    inferred_names isa NTuple{<:Any, Symbol} || return nothing
+    return Symbol[inferred_names[i] for i in 1:length(inferred_names)]
+end
+
+Experimental.register_error_hint(fielderror_listfields_hint_handler, FieldError)
 
 # ExceptionStack implementation
 size(s::ExceptionStack) = size(s.stack)
diff --git a/base/essentials.jl b/base/essentials.jl
index c4ce6dfbd4e3a..0e7be924c908c 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -202,7 +202,8 @@ macro _total_meta()
         #=:inaccessiblememonly=#true,
         #=:noub=#true,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
 end
 # can be used in place of `@assume_effects :foldable` (supposed to be used for bootstrapping)
 macro _foldable_meta()
@@ -216,7 +217,8 @@ macro _foldable_meta()
         #=:inaccessiblememonly=#true,
         #=:noub=#true,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#true))
 end
 # can be used in place of `@assume_effects :terminates_locally` (supposed to be used for bootstrapping)
 macro _terminates_locally_meta()
@@ -230,7 +232,8 @@ macro _terminates_locally_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :terminates_globally` (supposed to be used for bootstrapping)
 macro _terminates_globally_meta()
@@ -244,7 +247,8 @@ macro _terminates_globally_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :terminates_globally :notaskstate` (supposed to be used for bootstrapping)
 macro _terminates_globally_notaskstate_meta()
@@ -258,7 +262,8 @@ macro _terminates_globally_notaskstate_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :terminates_globally :noub` (supposed to be used for bootstrapping)
 macro _terminates_globally_noub_meta()
@@ -272,7 +277,8 @@ macro _terminates_globally_noub_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#true,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :effect_free :terminates_locally` (supposed to be used for bootstrapping)
 macro _effect_free_terminates_locally_meta()
@@ -286,7 +292,8 @@ macro _effect_free_terminates_locally_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :nothrow :noub` (supposed to be used for bootstrapping)
 macro _nothrow_noub_meta()
@@ -300,7 +307,8 @@ macro _nothrow_noub_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#true,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
 macro _nothrow_meta()
@@ -314,7 +322,8 @@ macro _nothrow_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :nothrow` (supposed to be used for bootstrapping)
 macro _noub_meta()
@@ -328,7 +337,8 @@ macro _noub_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#true,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :notaskstate` (supposed to be used for bootstrapping)
 macro _notaskstate_meta()
@@ -342,7 +352,8 @@ macro _notaskstate_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
         #=:noub_if_noinbounds=#false,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 # can be used in place of `@assume_effects :noub_if_noinbounds` (supposed to be used for bootstrapping)
 macro _noub_if_noinbounds_meta()
@@ -356,7 +367,8 @@ macro _noub_if_noinbounds_meta()
         #=:inaccessiblememonly=#false,
         #=:noub=#false,
         #=:noub_if_noinbounds=#true,
-        #=:consistent_overlay=#false))
+        #=:consistent_overlay=#false,
+        #=:nortcall=#false))
 end
 
 # another version of inlining that propagates an inbounds context
@@ -389,7 +401,7 @@ julia> nameof(Base.Broadcast)
 """
 nameof(m::Module) = (@_total_meta; ccall(:jl_module_name, Ref{Symbol}, (Any,), m))
 
-function iterate end
+typeof(function iterate end).name.constprop_heuristic = Core.ITERATE_HEURISTIC
 
 """
     convert(T, x)
@@ -575,15 +587,7 @@ function unconstrain_vararg_length(va::Core.TypeofVararg)
     return Vararg{unwrapva(va)}
 end
 
-typename(a) = error("typename does not apply to this type")
-typename(a::DataType) = a.name
-function typename(a::Union)
-    ta = typename(a.a)
-    tb = typename(a.b)
-    ta === tb || error("typename does not apply to unions whose components have different typenames")
-    return tb
-end
-typename(union::UnionAll) = typename(union.body)
+import Core: typename
 
 _tuple_error(T::Type, x) = (@noinline; throw(MethodError(convert, (T, x))))
 
@@ -1245,3 +1249,16 @@ that is whether it has an `iterate` method or not.
 function isiterable(T)::Bool
     return hasmethod(iterate, Tuple{T})
 end
+
+# Special constprop heuristics for various binary opes
+typename(typeof(function + end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function - end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function * end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function == end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function != end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function <= end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function >= end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function < end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function > end)).constprop_heuristic  = Core.SAMETYPE_HEURISTIC
+typename(typeof(function << end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
+typename(typeof(function >> end)).constprop_heuristic = Core.SAMETYPE_HEURISTIC
diff --git a/base/experimental.jl b/base/experimental.jl
index 58c7258120f3f..648b5da0ed9a1 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -319,9 +319,9 @@ function show_error_hints(io, ex, args...)
     for handler in hinters
         try
             @invokelatest handler(io, ex, args...)
-        catch err
+        catch
             tn = typeof(handler).name
-            @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error"
+            @error "Hint-handler $handler for $(typeof(ex)) in $(tn.module) caused an error" exception=current_exceptions()
         end
     end
 end
@@ -457,4 +457,18 @@ without adding them to the global method table.
 """
 :@MethodTable
 
+"""
+    Base.Experimental.entrypoint(f, argtypes::Tuple)
+
+Mark a method for inclusion when the `--trim` option is specified.
+"""
+function entrypoint(@nospecialize(f), @nospecialize(argtypes::Tuple))
+    entrypoint(Tuple{Core.Typeof(f), argtypes...})
+end
+
+function entrypoint(@nospecialize(argt::Type))
+    ccall(:jl_add_entrypoint, Int32, (Any,), argt)
+    nothing
+end
+
 end
diff --git a/base/exports.jl b/base/exports.jl
index 5564cdbe9bff2..daba9a010a9e6 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -58,6 +58,7 @@ export
     IOBuffer,
     IOStream,
     LinRange,
+    Lockable,
     Irrational,
     LazyString,
     Matrix,
@@ -406,6 +407,7 @@ export
     indexin,
     argmax,
     argmin,
+    insertdims,
     invperm,
     invpermute!,
     isassigned,
@@ -594,6 +596,7 @@ export
     codepoint,
     codeunit,
     codeunits,
+    ctruncate,
     digits,
     digits!,
     eachsplit,
@@ -618,6 +621,7 @@ export
     join,
     lpad,
     lstrip,
+    ltruncate,
     ncodeunits,
     ndigits,
     nextind,
@@ -630,6 +634,7 @@ export
     rpad,
     rsplit,
     rstrip,
+    rtruncate,
     split,
     string,
     strip,
diff --git a/base/expr.jl b/base/expr.jl
index a58ae62c21a5f..478ccd7d7cc20 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -65,7 +65,7 @@ function copy_exprs(@nospecialize(x))
     end
     return x
 end
-copy_exprargs(x::Array{Any,1}) = Any[copy_exprs(@inbounds x[i]) for i in 1:length(x)]
+copy_exprargs(x::Array{Any,1}) = Any[copy_exprs(@inbounds x[i]) for i in eachindex(x)]
 
 @eval exprarray(head::Symbol, arg::Array{Any,1}) = $(Expr(:new, :Expr, :head, :arg))
 
@@ -505,6 +505,7 @@ The following `setting`s are supported.
 - `:inaccessiblememonly`
 - `:noub`
 - `:noub_if_noinbounds`
+- `:nortcall`
 - `:foldable`
 - `:removable`
 - `:total`
@@ -673,6 +674,20 @@ The `:noub` setting asserts that the method will not execute any undefined behav
 any other effect assertions (such as `:consistent` or `:effect_free`) as well, but we do
 not model this, and they assume the absence of undefined behavior.
 
+---
+## `:nortcall`
+
+The `:nortcall` setting asserts that the method does not call `Core.Compiler.return_type`,
+and that any other methods this method might call also do not call `Core.Compiler.return_type`.
+
+!!! note
+    To be precise, this assertion can be used when a call to `Core.Compiler.return_type` is
+    not made at runtime; that is, when the result of `Core.Compiler.return_type` is known
+    exactly at compile time and the call is eliminated by the optimizer. However, since
+    whether the result of `Core.Compiler.return_type` is folded at compile time depends
+    heavily on the compiler's implementation, it is generally risky to assert this if
+    the method in question uses `Core.Compiler.return_type` in any form.
+
 ---
 ## `:foldable`
 
@@ -683,6 +698,7 @@ currently equivalent to the following `setting`s:
 - `:effect_free`
 - `:terminates_globally`
 - `:noub`
+- `:nortcall`
 
 !!! note
     This list in particular does not include `:nothrow`. The compiler will still
@@ -716,6 +732,7 @@ the following other `setting`s:
 - `:notaskstate`
 - `:inaccessiblememonly`
 - `:noub`
+- `:nortcall`
 
 !!! warning
     `:total` is a very strong assertion and will likely gain additional semantics
@@ -794,17 +811,17 @@ function compute_assumed_setting(override::EffectsOverride, @nospecialize(settin
     elseif setting === :noub_if_noinbounds
         return EffectsOverride(override; noub_if_noinbounds = val)
     elseif setting === :foldable
-        consistent = effect_free = terminates_globally = noub = val
-        return EffectsOverride(override; consistent, effect_free, terminates_globally, noub)
+        consistent = effect_free = terminates_globally = noub = nortcall = val
+        return EffectsOverride(override; consistent, effect_free, terminates_globally, noub, nortcall)
     elseif setting === :removable
         effect_free = nothrow = terminates_globally = val
         return EffectsOverride(override; effect_free, nothrow, terminates_globally)
     elseif setting === :total
         consistent = effect_free = nothrow = terminates_globally = notaskstate =
-            inaccessiblememonly = noub = val
+            inaccessiblememonly = noub = nortcall = val
         return EffectsOverride(override;
             consistent, effect_free, nothrow, terminates_globally, notaskstate,
-            inaccessiblememonly, noub)
+            inaccessiblememonly, noub, nortcall)
     end
     return nothing
 end
@@ -850,6 +867,9 @@ while it can not infer the concrete return type of it.
 Without the `@nospecializeinfer`, `f([1.0])` would infer the return type of `g` as `Float64`,
 indicating that inference ran for `g(::Vector{Float64})` despite the prohibition on
 specialized code generation.
+
+!!! compat "Julia 1.10"
+    Using `Base.@nospecializeinfer` requires Julia version 1.10.
 """
 macro nospecializeinfer(ex)
     esc(isa(ex, Expr) ? pushmeta!(ex, :nospecializeinfer) : ex)
@@ -882,8 +902,8 @@ end
 unwrap_macrocalls(@nospecialize(x)) = x
 function unwrap_macrocalls(ex::Expr)
     inner = ex
-    while inner.head === :macrocall
-        inner = inner.args[end]::Expr
+    while isexpr(inner, :macrocall)
+        inner = inner.args[end]
     end
     return inner
 end
diff --git a/base/file.jl b/base/file.jl
index 727b97abd36f1..567783c4b1e5b 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -230,15 +230,19 @@ julia> mkpath("intermediate_dir/actually_a_directory.txt") # creates two directo
 julia> isdir("intermediate_dir/actually_a_directory.txt")
 true
 
+julia> mkpath("my/test/dir/") # returns the original `path`
+"my/test/dir/"
 ```
 """
 function mkpath(path::AbstractString; mode::Integer = 0o777)
-    isdirpath(path) && (path = dirname(path))
-    dir = dirname(path)
-    (path == dir || isdir(path)) && return path
-    mkpath(dir, mode = checkmode(mode))
+    parent = dirname(path)
+    # stop recursion for `""`, `"/"`, or existing dir
+    (path == parent || isdir(path)) && return path
+    mkpath(parent, mode = checkmode(mode))
     try
-        mkdir(path, mode = mode)
+        # The `isdir` check could be omitted, then `mkdir` will throw an error in cases like `x/`.
+        # Although the error will not be rethrown, we avoid it in advance for performance reasons.
+        isdir(path) || mkdir(path, mode = mode)
     catch err
         # If there is a problem with making the directory, but the directory
         # does in fact exist, then ignore the error. Else re-throw it.
@@ -246,7 +250,7 @@ function mkpath(path::AbstractString; mode::Integer = 0o777)
             rethrow()
         end
     end
-    path
+    return path
 end
 
 # Files that were requested to be deleted but can't be by the current process
@@ -381,7 +385,7 @@ of the file or directory `src` refers to.
 Return `dst`.
 
 !!! note
-    The `cp` function is different from the `cp` command. The `cp` function always operates on
+    The `cp` function is different from the `cp` Unix command. The `cp` function always operates on
     the assumption that `dst` is a file, while the command does different things depending
     on whether `dst` is a directory or a file.
     Using `force=true` when `dst` is a directory will result in loss of all the contents present
@@ -434,13 +438,73 @@ julia> mv("hello.txt", "goodbye.txt", force=true)
 julia> rm("goodbye.txt");
 
 ```
+
+!!! note
+    The `mv` function is different from the `mv` Unix command. The `mv` function by
+    default will error if `dst` exists, while the command will delete
+    an existing `dst` file by default.
+    Also the `mv` function always operates on
+    the assumption that `dst` is a file, while the command does different things depending
+    on whether `dst` is a directory or a file.
+    Using `force=true` when `dst` is a directory will result in loss of all the contents present
+    in the `dst` directory, and `dst` will become a file that has the contents of `src` instead.
 """
 function mv(src::AbstractString, dst::AbstractString; force::Bool=false)
-    checkfor_mv_cp_cptree(src, dst, "moving"; force=force)
-    rename(src, dst)
+    if force
+        _mv_replace(src, dst)
+    else
+        _mv_noreplace(src, dst)
+    end
+end
+
+function _mv_replace(src::AbstractString, dst::AbstractString)
+    # This check is copied from checkfor_mv_cp_cptree
+    if ispath(dst) && Base.samefile(src, dst)
+        abs_src = islink(src) ? abspath(readlink(src)) : abspath(src)
+        abs_dst = islink(dst) ? abspath(readlink(dst)) : abspath(dst)
+        throw(ArgumentError(string("'src' and 'dst' refer to the same file/dir. ",
+                                   "This is not supported.\n  ",
+                                   "`src` refers to: $(abs_src)\n  ",
+                                   "`dst` refers to: $(abs_dst)\n")))
+    end
+    # First try to do a regular rename, because this might avoid a situation
+    # where dst is deleted or truncated.
+    try
+        rename(src, dst)
+    catch err
+        err isa IOError || rethrow()
+        err.code==Base.UV_ENOENT && rethrow()
+        # on rename error try to delete dst if it exists and isn't the same as src
+        checkfor_mv_cp_cptree(src, dst, "moving"; force=true)
+        try
+            rename(src, dst)
+        catch err
+            err isa IOError || rethrow()
+            # on second error, default to force cp && rm
+            cp(src, dst; force=true, follow_symlinks=false)
+            rm(src; recursive=true)
+        end
+    end
+    dst
+end
+
+function _mv_noreplace(src::AbstractString, dst::AbstractString)
+    # Error if dst exists.
+    # This check currently has TOCTTOU issues.
+    checkfor_mv_cp_cptree(src, dst, "moving"; force=false)
+    try
+        rename(src, dst)
+    catch err
+        err isa IOError || rethrow()
+        err.code==Base.UV_ENOENT && rethrow()
+        # on error, default to cp && rm
+        cp(src, dst; force=false, follow_symlinks=false)
+        rm(src; recursive=true)
+    end
     dst
 end
 
+
 """
     touch(path::AbstractString)
     touch(fd::File)
@@ -1039,24 +1103,30 @@ end
     walkdir(dir; topdown=true, follow_symlinks=false, onerror=throw)
 
 Return an iterator that walks the directory tree of a directory.
-The iterator returns a tuple containing `(rootpath, dirs, files)`.
+
+The iterator returns a tuple containing `(path, dirs, files)`.
+Each iteration `path` will change to the next directory in the tree;
+then `dirs` and `files` will be vectors containing the directories and files
+in the current `path` directory.
 The directory tree can be traversed top-down or bottom-up.
 If `walkdir` or `stat` encounters a `IOError` it will rethrow the error by default.
 A custom error handling function can be provided through `onerror` keyword argument.
 `onerror` is called with a `IOError` as argument.
+The returned iterator is stateful so when accessed repeatedly each access will
+resume where the last left off, like [`Iterators.Stateful`](@ref).
 
 See also: [`readdir`](@ref).
 
 # Examples
 ```julia
-for (root, dirs, files) in walkdir(".")
-    println("Directories in \$root")
+for (path, dirs, files) in walkdir(".")
+    println("Directories in \$path")
     for dir in dirs
-        println(joinpath(root, dir)) # path to directories
+        println(joinpath(path, dir)) # path to directories
     end
-    println("Files in \$root")
+    println("Files in \$path")
     for file in files
-        println(joinpath(root, file)) # path to files
+        println(joinpath(path, file)) # path to files
     end
 end
 ```
@@ -1066,18 +1136,18 @@ julia> mkpath("my/test/dir");
 
 julia> itr = walkdir("my");
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my", ["test"], String[])
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my/test", ["dir"], String[])
 
-julia> (root, dirs, files) = first(itr)
+julia> (path, dirs, files) = first(itr)
 ("my/test/dir", String[], String[])
 ```
 """
-function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
-    function _walkdir(chnl, root)
+function walkdir(path; topdown=true, follow_symlinks=false, onerror=throw)
+    function _walkdir(chnl, path)
         tryf(f, p) = try
                 f(p)
             catch err
@@ -1089,7 +1159,7 @@ function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
                 end
                 return
             end
-        entries = tryf(_readdirx, root)
+        entries = tryf(_readdirx, path)
         entries === nothing && return
         dirs = Vector{String}()
         files = Vector{String}()
@@ -1103,17 +1173,17 @@ function walkdir(root; topdown=true, follow_symlinks=false, onerror=throw)
         end
 
         if topdown
-            push!(chnl, (root, dirs, files))
+            push!(chnl, (path, dirs, files))
         end
         for dir in dirs
-            _walkdir(chnl, joinpath(root, dir))
+            _walkdir(chnl, joinpath(path, dir))
         end
         if !topdown
-            push!(chnl, (root, dirs, files))
+            push!(chnl, (path, dirs, files))
         end
         nothing
     end
-    return Channel{Tuple{String,Vector{String},Vector{String}}}(chnl -> _walkdir(chnl, root))
+    return Channel{Tuple{String,Vector{String},Vector{String}}}(chnl -> _walkdir(chnl, path))
 end
 
 function unlink(p::AbstractString)
@@ -1122,15 +1192,38 @@ function unlink(p::AbstractString)
     nothing
 end
 
-# For move command
-function rename(src::AbstractString, dst::AbstractString; force::Bool=false)
-    err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), src, dst)
-    # on error, default to cp && rm
+"""
+    Base.rename(oldpath::AbstractString, newpath::AbstractString)
+
+Change the name of a file or directory from `oldpath` to `newpath`.
+If `newpath` is an existing file or empty directory it may be replaced.
+Equivalent to [rename(2)](https://man7.org/linux/man-pages/man2/rename.2.html) on Unix.
+If a path contains a "\\0" throw an `ArgumentError`.
+On other failures throw an `IOError`.
+Return `newpath`.
+
+This is a lower level filesystem operation used to implement [`mv`](@ref).
+
+OS-specific restrictions may apply when `oldpath` and `newpath` are in different directories.
+
+Currently there are a few differences in behavior on Windows which may be resolved in a future release.
+Specifically, currently on Windows:
+1. `rename` will fail if `oldpath` or `newpath` are opened files.
+2. `rename` will fail if `newpath` is an existing directory.
+3. `rename` may work if `newpath` is a file and `oldpath` is a directory.
+4. `rename` may remove `oldpath` if it is a hardlink to `newpath`.
+
+See also: [`mv`](@ref).
+
+!!! compat "Julia 1.12"
+    This method was made public in Julia 1.12.
+"""
+function rename(oldpath::AbstractString, newpath::AbstractString)
+    err = ccall(:jl_fs_rename, Int32, (Cstring, Cstring), oldpath, newpath)
     if err < 0
-        cp(src, dst; force=force, follow_symlinks=false)
-        rm(src; recursive=true)
+        uv_error("rename($(repr(oldpath)), $(repr(newpath)))", err)
     end
-    nothing
+    newpath
 end
 
 function sendfile(src::AbstractString, dst::AbstractString)
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index a2a0f60bcf399..2c26f7cff1133 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -42,7 +42,7 @@ it is the minimum of `maxintfloat(T)` and [`typemax(S)`](@ref).
 maxintfloat(::Type{S}, ::Type{T}) where {S<:AbstractFloat, T<:Integer} = min(maxintfloat(S), S(typemax(T)))
 maxintfloat() = maxintfloat(Float64)
 
-isinteger(x::AbstractFloat) = (x - trunc(x) == 0)
+isinteger(x::AbstractFloat) = iszero(x - trunc(x)) # note: x == trunc(x) would be incorrect for x=Inf
 
 # See rounding.jl for docstring.
 
@@ -232,7 +232,9 @@ function isapprox(x::Integer, y::Integer;
     if norm === abs && atol < 1 && rtol == 0
         return x == y
     else
-        return norm(x - y) <= max(atol, rtol*max(norm(x), norm(y)))
+        # We need to take the difference `max` - `min` when comparing unsigned integers.
+        _x, _y = x < y ? (x, y) : (y, x)
+        return norm(_y - _x) <= max(atol, rtol*max(norm(_x), norm(_y)))
     end
 end
 
diff --git a/base/gcutils.jl b/base/gcutils.jl
index 7aea7f222d7fd..84a184537ffc0 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -38,7 +38,7 @@ WeakRef
 # Used by `Base.finalizer` to validate mutability of an object being finalized.
 function _check_mutable(@nospecialize(o)) @noinline
     if !ismutable(o)
-        error("objects of type ", typeof(o), " cannot be finalized")
+        error("objects of type ", typeof(o), " cannot be finalized because they are not mutable")
     end
 end
 
diff --git a/base/genericmemory.jl b/base/genericmemory.jl
index 32c15a22e0db1..91b87ab14c6b1 100644
--- a/base/genericmemory.jl
+++ b/base/genericmemory.jl
@@ -71,6 +71,8 @@ size(a::GenericMemory) = (length(a),)
 
 IndexStyle(::Type{<:GenericMemory}) = IndexLinear()
 
+parent(ref::GenericMemoryRef) = ref.mem
+
 pointer(mem::GenericMemoryRef) = unsafe_convert(Ptr{Cvoid}, mem) # no bounds check, even for empty array
 
 _unsetindex!(A::Memory, i::Int) =  (@_propagate_inbounds_meta; _unsetindex!(memoryref(A, i)); A)
@@ -188,7 +190,7 @@ function fill!(a::Union{Memory{UInt8}, Memory{Int8}}, x::Integer)
     t = @_gc_preserve_begin a
     p = unsafe_convert(Ptr{Cvoid}, a)
     T = eltype(a)
-    memset(p, x isa T ? x : convert(T, x), length(a))
+    memset(p, x isa T ? x : convert(T, x), length(a) % UInt)
     @_gc_preserve_end t
     return a
 end
@@ -316,38 +318,15 @@ function indcopy(sz::Dims, I::GenericMemory)
     dst, src
 end
 
-# Wrapping a memory region in an Array
-@eval begin # @eval for the Array construction. Block for the docstring.
-    function reshape(m::GenericMemory{M, T}, dims::Vararg{Int, N}) where {M, T, N}
-        len = Core.checked_dims(dims...)
-        length(m) == len || throw(DimensionMismatch("parent has $(length(m)) elements, which is incompatible with size $(dims)"))
-        ref = memoryref(m)
-        $(Expr(:new, :(Array{T, N}), :ref, :dims))
-    end
-
-    """
-        view(m::GenericMemory{M, T}, inds::Union{UnitRange, OneTo})
-
-    Create a vector `v::Vector{T}` backed by the specified indices of `m`. It is only safe to
-    resize `v` if `m` is subseqently not used.
-    """
-    function view(m::GenericMemory{M, T}, inds::Union{UnitRange, OneTo}) where {M, T}
-        isempty(inds) && return T[] # needed to allow view(Memory{T}(undef, 0), 2:1)
-        @boundscheck checkbounds(m, inds)
-        ref = memoryref(m, first(inds)) # @inbounds would be safe here but does not help performance.
-        dims = (Int(length(inds)),)
-        $(Expr(:new, :(Array{T, 1}), :ref, :dims))
-    end
-end
-view(m::GenericMemory, inds::Colon) = view(m, eachindex(m))
-
 # get, set(once), modify, swap and replace at index, atomically
 function getindex_atomic(mem::GenericMemory, order::Symbol, i::Int)
+    @_propagate_inbounds_meta
     memref = memoryref(mem, i)
     return memoryrefget(memref, order, @_boundscheck)
 end
 
 function setindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    @_propagate_inbounds_meta
     T = eltype(mem)
     memref = memoryref(mem, i)
     return memoryrefset!(
@@ -365,6 +344,7 @@ function setindexonce_atomic!(
     val,
     i::Int,
 )
+    @_propagate_inbounds_meta
     T = eltype(mem)
     memref = memoryref(mem, i)
     return Core.memoryrefsetonce!(
@@ -377,11 +357,13 @@ function setindexonce_atomic!(
 end
 
 function modifyindex_atomic!(mem::GenericMemory, order::Symbol, op, val, i::Int)
+    @_propagate_inbounds_meta
     memref = memoryref(mem, i)
     return Core.memoryrefmodify!(memref, op, val, order, @_boundscheck)
 end
 
 function swapindex_atomic!(mem::GenericMemory, order::Symbol, val, i::Int)
+    @_propagate_inbounds_meta
     T = eltype(mem)
     memref = memoryref(mem, i)
     return Core.memoryrefswap!(
@@ -400,6 +382,7 @@ function replaceindex_atomic!(
     desired,
     i::Int,
 )
+    @_propagate_inbounds_meta
     T = eltype(mem)
     memref = memoryref(mem, i)
     return Core.memoryrefreplace!(
diff --git a/base/iddict.jl b/base/iddict.jl
index 9c133d5ba23c6..f1632e93427a8 100644
--- a/base/iddict.jl
+++ b/base/iddict.jl
@@ -126,7 +126,7 @@ function empty!(d::IdDict)
     d.ht = Memory{Any}(undef, 32)
     ht = d.ht
     t = @_gc_preserve_begin ht
-    memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht))
+    memset(unsafe_convert(Ptr{Cvoid}, ht), 0, sizeof(ht) % UInt)
     @_gc_preserve_end t
     d.ndel = 0
     d.count = 0
diff --git a/base/initdefs.jl b/base/initdefs.jl
index aa2ea67528da9..707c96a2444d6 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -438,6 +438,11 @@ function atexit(f::Function)
 end
 
 function _atexit(exitcode::Cint)
+    # this current task shouldn't be scheduled anywhere, but if it was (because
+    # this exit came from a signal for example), then try to clear that state
+    # to minimize scheduler issues later
+    ct = current_task()
+    q = ct.queue; q === nothing || list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
     # Don't hold the lock around the iteration, just in case any other thread executing in
     # parallel tries to register a new atexit hook while this is running. We don't want to
     # block that thread from proceeding, and we can allow it to register its hook which we
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 6df5b2c36124b..ec450aff2dff2 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -165,17 +165,24 @@ end
 
 # return (gcd(a, b), x, y) such that ax+by == gcd(a, b)
 """
-    gcdx(a, b)
+    gcdx(a, b...)
 
 Computes the greatest common (positive) divisor of `a` and `b` and their Bézout
 coefficients, i.e. the integer coefficients `u` and `v` that satisfy
-``ua+vb = d = gcd(a, b)``. ``gcdx(a, b)`` returns ``(d, u, v)``.
+``u*a + v*b = d = gcd(a, b)``. ``gcdx(a, b)`` returns ``(d, u, v)``.
+
+For more arguments than two, i.e., `gcdx(a, b, c, ...)` the Bézout coefficients are computed
+recursively, returning a solution `(d, u, v, w, ...)` to
+``u*a + v*b + w*c + ... = d = gcd(a, b, c, ...)``.
 
 The arguments may be integer and rational numbers.
 
 !!! compat "Julia 1.4"
     Rational arguments require Julia 1.4 or later.
 
+!!! compat "Julia 1.12"
+    More or fewer arguments than two require Julia 1.12 or later.
+
 # Examples
 ```jldoctest
 julia> gcdx(12, 42)
@@ -183,6 +190,9 @@ julia> gcdx(12, 42)
 
 julia> gcdx(240, 46)
 (2, -9, 47)
+
+julia> gcdx(15, 12, 20)
+(1, 7, -7, -1)
 ```
 
 !!! note
@@ -215,6 +225,18 @@ Base.@assume_effects :terminates_locally function gcdx(a::Integer, b::Integer)
 end
 gcdx(a::Real, b::Real) = gcdx(promote(a,b)...)
 gcdx(a::T, b::T) where T<:Real = throw(MethodError(gcdx, (a,b)))
+gcdx(a::Real) = (gcd(a), signbit(a) ? -one(a) : one(a))
+function gcdx(a::Real, b::Real, cs::Real...)
+    # a solution to the 3-arg `gcdx(a,b,c)` problem, `u*a + v*b + w*c = gcd(a,b,c)`, can be
+    # obtained from the 2-arg problem in three steps:
+    #   1. `gcdx(a,b)`: solve `i*a + j*b = d′ = gcd(a,b)` for `(i,j)`
+    #   2. `gcdx(d′,c)`: solve `x*gcd(a,b) + yc = gcd(gcd(a,b),c) = gcd(a,b,c)` for `(x,y)`
+    #   3. return `d = gcd(a,b,c)`, `u = i*x`, `v = j*x`, and `w = y`
+    # the N-arg solution proceeds similarly by recursion
+    d, i, j = gcdx(a, b)
+    d′, x, ys... = gcdx(d, cs...)
+    return d′, i*x, j*x, ys...
+end
 
 # multiplicative inverse of n mod m, error if none
 
@@ -263,14 +285,16 @@ end
     invmod(n::T) where {T <: Base.BitInteger}
 
 Compute the modular inverse of `n` in the integer ring of type `T`, i.e. modulo
-`2^N` where `N = 8*sizeof(T)` (e.g. `N = 32` for `Int32`). In other words these
+`2^N` where `N = 8*sizeof(T)` (e.g. `N = 32` for `Int32`). In other words, these
 methods satisfy the following identities:
 ```
 n * invmod(n) == 1
 (n * invmod(n, T)) % T == 1
 (n % T) * invmod(n, T) == 1
 ```
-Note that `*` here is modular multiplication in the integer ring, `T`.
+Note that `*` here is modular multiplication in the integer ring, `T`.  This will
+throw an error if `n` is even, because then it is not relatively prime with `2^N`
+and thus has no such inverse.
 
 Specifying the modulus implied by an integer type as an explicit value is often
 inconvenient since the modulus is by definition too big to be represented by the
@@ -296,7 +320,11 @@ function invmod(n::T) where {T<:BitInteger}
 end
 
 # ^ for any x supporting *
-to_power_type(x) = convert(Base._return_type(*, Tuple{typeof(x), typeof(x)}), x)
+function to_power_type(x::Number)
+    T = promote_type(typeof(x), typeof(one(x)), typeof(x*x))
+    convert(T, x)
+end
+to_power_type(x) = oftype(x*x, x)
 @noinline throw_domerr_powbysq(::Any, p) = throw(DomainError(p, LazyString(
     "Cannot raise an integer x to a negative power ", p, ".",
     "\nConvert input to float.")))
@@ -360,7 +388,7 @@ end
 
 # Restrict inlining to hardware-supported arithmetic types, which
 # are fast enough to benefit from inlining.
-const HWReal = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float32,Float64}
+const HWReal = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64}
 const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 
 # Inline x^2 and x^3 for Val
@@ -978,7 +1006,7 @@ end
 
 Return an array with element type `T` (default `Int`) of the digits of `n` in the given
 base, optionally padded with zeros to a specified size. More significant digits are at
-higher indices, such that `n == sum(digits[k]*base^(k-1) for k=1:length(digits))`.
+higher indices, such that `n == sum(digits[k]*base^(k-1) for k in 1:length(digits))`.
 
 See also [`ndigits`](@ref), [`digits!`](@ref),
 and for base 2 also [`bitstring`](@ref), [`count_ones`](@ref).
@@ -1237,3 +1265,102 @@ function binomial(x::Number, k::Integer)
     # and instead divide each term by i, to avoid spurious overflow.
     return prod(i -> (x-(i-1))/i, OneTo(k), init=oneunit(x)/one(k))
 end
+
+"""
+    clamp(x, lo, hi)
+
+Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
+are promoted to a common type.
+
+See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` as the first argument requires at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
+3-element Vector{BigFloat}:
+ 3.141592653589793238462643383279502884197169399375105820974944592307816406286198
+ 2.0
+ 9.0
+
+julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
+3-element Vector{Int64}:
+  6
+  6
+ 10
+```
+"""
+function clamp(x::X, lo::L, hi::H) where {X,L,H}
+    T = promote_type(X, L, H)
+    return (x > hi) ? convert(T, hi) : (x < lo) ? convert(T, lo) : convert(T, x)
+end
+
+"""
+    clamp(x, T)::T
+
+Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
+
+See also [`trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> clamp(200, Int8)
+127
+
+julia> clamp(-200, Int8)
+-128
+
+julia> trunc(Int, 4pi^2)
+39
+```
+"""
+function clamp(x, ::Type{T}) where {T<:Integer}
+    # delegating to clamp(x, typemin(T), typemax(T)) would promote types
+    # this way, we avoid unnecessary conversions
+    # think of, e.g., clamp(big(2) ^ 200, Int16)
+    lo = typemin(T)
+    hi = typemax(T)
+    return (x > hi) ? hi : (x < lo) ? lo : convert(T, x)
+end
+
+
+"""
+    clamp!(array::AbstractArray, lo, hi)
+
+Restrict values in `array` to the specified range, in-place.
+See also [`clamp`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` entries in `array` require at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> row = collect(-4:4)';
+
+julia> clamp!(row, 0, Inf)
+1×9 adjoint(::Vector{Int64}) with eltype Int64:
+ 0  0  0  0  0  1  2  3  4
+
+julia> clamp.((-4:4)', 0, Inf)
+1×9 Matrix{Float64}:
+ 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
+```
+"""
+function clamp!(x::AbstractArray, lo, hi)
+    @inbounds for i in eachindex(x)
+        x[i] = clamp(x[i], lo, hi)
+    end
+    x
+end
+
+"""
+    clamp(x::Integer, r::AbstractUnitRange)
+
+Clamp `x` to lie within range `r`.
+
+!!! compat "Julia 1.6"
+     This method requires at least Julia 1.6.
+"""
+clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
diff --git a/base/io.jl b/base/io.jl
index 0f1812942d23e..83a215d6359fc 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -543,8 +543,8 @@ julia> rm("my_file.txt")
 ```
 """
 readuntil(filename::AbstractString, delim; kw...) = open(io->readuntil(io, delim; kw...), convert(String, filename)::String)
-readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...))
-readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=70), stream, delim; kw...)))
+readuntil(stream::IO, delim::UInt8; kw...) = _unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...))
+readuntil(stream::IO, delim::Union{AbstractChar, AbstractString}; kw...) = String(_unsafe_take!(copyuntil(IOBuffer(sizehint=16), stream, delim; kw...)))
 readuntil(stream::IO, delim::T; keep::Bool=false) where T = _copyuntil(Vector{T}(), stream, delim, keep)
 
 
@@ -617,7 +617,7 @@ Logan
 readline(filename::AbstractString; keep::Bool=false) =
     open(io -> readline(io; keep), filename)
 readline(s::IO=stdin; keep::Bool=false) =
-    String(_unsafe_take!(copyline(IOBuffer(sizehint=70), s; keep)))
+    String(_unsafe_take!(copyline(IOBuffer(sizehint=16), s; keep)))
 
 """
     copyline(out::IO, io::IO=stdin; keep::Bool=false)
@@ -1111,7 +1111,7 @@ function copyuntil(out::IO, io::IO, target::AbstractString; keep::Bool=false)
 end
 
 function readuntil(io::IO, target::AbstractVector{T}; keep::Bool=false) where T
-    out = (T === UInt8 ? resize!(StringVector(70), 0) : Vector{T}())
+    out = (T === UInt8 ? resize!(StringVector(16), 0) : Vector{T}())
     readuntil_vector!(io, target, keep, out)
     return out
 end
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index 04a694a4fec15..c0c2731eec08b 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -42,7 +42,7 @@ end
 
 # allocate Vector{UInt8}s for IOBuffer storage that can efficiently become Strings
 StringMemory(n::Integer) = unsafe_wrap(Memory{UInt8}, _string_n(n))
-StringVector(n::Integer) = view(StringMemory(n), 1:n)::Vector{UInt8}
+StringVector(n::Integer) = wrap(Array, StringMemory(n))
 
 # IOBuffers behave like Files. They are typically readable and writable. They are seekable. (They can be appendable).
 
@@ -466,7 +466,7 @@ function take!(io::IOBuffer)
         if nbytes == 0 || io.reinit
             data = StringVector(0)
         elseif io.writable
-            data = view(io.data, io.offset+1:nbytes+io.offset)
+            data = wrap(Array, memoryref(io.data, io.offset + 1), nbytes)
         else
             data = copyto!(StringVector(nbytes), 1, io.data, io.offset + 1, nbytes)
         end
@@ -475,7 +475,7 @@ function take!(io::IOBuffer)
         if nbytes == 0
             data = StringVector(0)
         elseif io.writable
-            data = view(io.data, io.ptr:io.ptr+nbytes-1)
+            data = wrap(Array, memoryref(io.data, io.ptr), nbytes)
         else
             data = read!(io, data)
         end
@@ -501,7 +501,11 @@ state.  This should only be used internally for performance-critical
 It might save an allocation compared to `take!` (if the compiler elides the
 Array allocation), as well as omits some checks.
 """
-_unsafe_take!(io::IOBuffer) = view(io.data, io.offset+1:io.size)
+_unsafe_take!(io::IOBuffer) =
+    wrap(Array, io.size == io.offset ?
+        memoryref(Memory{UInt8}()) :
+        memoryref(io.data, io.offset + 1),
+        io.size - io.offset)
 
 function write(to::IO, from::GenericIOBuffer)
     written::Int = bytesavailable(from)
diff --git a/base/iostream.jl b/base/iostream.jl
index 40cf385acdd48..74908344e078e 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -47,17 +47,18 @@ macro _lock_ios(s, expr)
 end
 
 """
-    fd(stream)
+    fd(stream) -> RawFD
 
 Return the file descriptor backing the stream or file. Note that this function only applies
 to synchronous `File`'s and `IOStream`'s not to any of the asynchronous streams.
 
-File descriptors should typically be represented as [`RawFD`](@ref) objects, rather
-than as `Int`s, to ensure that they are properly interpreted by Julia functions.
+`RawFD` objects can be passed directly to other languages via the `ccall` interface.
 
-Note that `RawFD` objects can be passed directly to other languages via the `ccall` interface.
+!!! compat "Julia 1.12"
+    Prior to 1.12, this function returned an `Int` instead of a `RawFD`. You may use
+    `RawFD(fd(x))` to produce a `RawFD` in all Julia versions.
 """
-fd(s::IOStream) = Int(ccall(:jl_ios_fd, Clong, (Ptr{Cvoid},), s.ios))
+fd(s::IOStream) = RawFD(ccall(:jl_ios_fd, Clong, (Ptr{Cvoid},), s.ios))
 
 stat(s::IOStream) = stat(fd(s))
 
@@ -229,8 +230,8 @@ end
 function filesize(s::IOStream)
     sz = @_lock_ios s ccall(:ios_filesize, Int64, (Ptr{Cvoid},), s.ios)
     if sz == -1
-        err = Libc.errno()
-        throw(IOError(string("filesize: ", Libc.strerror(err), " for ", s.name), err))
+        # if `s` is not seekable `ios_filesize` can fail, so fall back to slower stat method
+        sz = filesize(stat(s))
     end
     return sz
 end
diff --git a/base/irrationals.jl b/base/irrationals.jl
index eafe388162353..b3073c503238a 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -216,7 +216,7 @@ function irrational(sym, val, def)
     esym = esc(sym)
     qsym = esc(Expr(:quote, sym))
     bigconvert = isa(def,Symbol) ? quote
-        function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=MPFR.ROUNDING_MODE[]; precision=precision(BigFloat))
+        function Base.BigFloat(::Irrational{$qsym}, r::MPFR.MPFRRoundingMode=Rounding.rounding_raw(BigFloat); precision=precision(BigFloat))
             c = BigFloat(;precision=precision)
             ccall(($(string("mpfr_const_", def)), :libmpfr),
                   Cint, (Ref{BigFloat}, MPFR.MPFRRoundingMode), c, r)
diff --git a/base/libuv.jl b/base/libuv.jl
index 143201598fde0..306854e9f4436 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -26,10 +26,10 @@ for r in uv_req_types
 @eval const $(Symbol("_sizeof_", lowercase(string(r)))) = uv_sizeof_req($r)
 end
 
-uv_handle_data(handle) = ccall(:jl_uv_handle_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
-uv_req_data(handle) = ccall(:jl_uv_req_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
-uv_req_set_data(req, data) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data)
-uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:jl_uv_req_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data)
+uv_handle_data(handle) = ccall(:uv_handle_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
+uv_req_data(handle) = ccall(:uv_req_get_data, Ptr{Cvoid}, (Ptr{Cvoid},), handle)
+uv_req_set_data(req, data) = ccall(:uv_req_set_data, Cvoid, (Ptr{Cvoid}, Any), req, data)
+uv_req_set_data(req, data::Ptr{Cvoid}) = ccall(:uv_handle_set_data, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid}), req, data)
 
 macro handle_as(hand, typ)
     return quote
@@ -133,7 +133,10 @@ function uv_return_spawn end
 function uv_asynccb end
 function uv_timercb end
 
-function reinit_stdio()
+reinit_stdio() = _reinit_stdio()
+# we need this so it can be called by codegen to print errors, even after
+# reinit_stdio has been redefined by the juliac build script.
+function _reinit_stdio()
     global stdin = init_stdio(ccall(:jl_stdin_stream, Ptr{Cvoid}, ()))::IO
     global stdout = init_stdio(ccall(:jl_stdout_stream, Ptr{Cvoid}, ()))::IO
     global stderr = init_stdio(ccall(:jl_stderr_stream, Ptr{Cvoid}, ()))::IO
diff --git a/base/loading.jl b/base/loading.jl
index 20946969f0238..fe4a4770628da 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -264,11 +264,14 @@ const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing)
 LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set(), Dict(), Dict(), Dict())
 
 
-struct TOMLCache
-    p::TOML.Parser
+struct TOMLCache{Dates}
+    p::TOML.Parser{Dates}
     d::Dict{String, CachedTOMLDict}
 end
-const TOML_CACHE = TOMLCache(TOML.Parser(), Dict{String, Dict{String, Any}}())
+TOMLCache(p::TOML.Parser) = TOMLCache(p, Dict{String, CachedTOMLDict}())
+TOMLCache(p::TOML.Parser, d::Dict{String, Dict{String, Any}}) = TOMLCache(p, convert(Dict{String, CachedTOMLDict}, d))
+
+const TOML_CACHE = TOMLCache(TOML.Parser{nothing}())
 
 parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, require_lock)
 function parsed_toml(project_file::AbstractString, toml_cache::TOMLCache, toml_lock::ReentrantLock)
@@ -505,6 +508,8 @@ package root.
 To get the root directory of the package that implements the current module
 the form `pkgdir(@__MODULE__)` can be used.
 
+If an extension module is given, the root of the parent package is returned.
+
 ```julia-repl
 julia> pkgdir(Foo)
 "/path/to/Foo.jl"
@@ -522,7 +527,19 @@ function pkgdir(m::Module, paths::String...)
     rootmodule = moduleroot(m)
     path = pathof(rootmodule)
     path === nothing && return nothing
-    return joinpath(dirname(dirname(path)), paths...)
+    original = path
+    path, base = splitdir(dirname(path))
+    if base == "src"
+        # package source in `../src/Foo.jl`
+    elseif base == "ext"
+        # extension source in `../ext/FooExt.jl`
+    elseif basename(path) == "ext"
+        # extension source in `../ext/FooExt/FooExt.jl`
+        path = dirname(path)
+    else
+        error("Unexpected path structure for module source: $original")
+    end
+    return joinpath(path, paths...)
 end
 
 function get_pkgversion_from_path(path)
@@ -818,14 +835,15 @@ end
 # given a project directory (implicit env from LOAD_PATH) and a name,
 # find an entry point for `name`, and see if it has an associated project file
 function entry_point_and_project_file(dir::String, name::String)::Union{Tuple{Nothing,Nothing},Tuple{String,Nothing},Tuple{String,String}}
-    path = normpath(joinpath(dir, "$name.jl"))
-    isfile_casesensitive(path) && return path, nothing
     dir_name = joinpath(dir, name)
     path, project_file = entry_point_and_project_file_inside(dir_name, name)
     path === nothing || return path, project_file
     dir_jl = dir_name * ".jl"
     path, project_file = entry_point_and_project_file_inside(dir_jl, name)
     path === nothing || return path, project_file
+    # check for less likely case with a bare file and no src directory last to minimize stat calls
+    path = normpath(joinpath(dir, "$name.jl"))
+    isfile_casesensitive(path) && return path, nothing
     return nothing, nothing
 end
 
@@ -1121,13 +1139,8 @@ function cache_file_entry(pkg::PkgId)
         uuid === nothing ? pkg.name : package_slug(uuid)
 end
 
-# for use during running the REPL precompilation subprocess script, given we don't
-# want it to pick up caches that already exist for other optimization levels
-const ignore_compiled_cache = PkgId[]
-
 function find_all_in_cache_path(pkg::PkgId, DEPOT_PATH::typeof(DEPOT_PATH)=DEPOT_PATH)
     paths = String[]
-    pkg in ignore_compiled_cache && return paths
     entrypath, entryfile = cache_file_entry(pkg)
     for path in DEPOT_PATH
         path = joinpath(path, entrypath)
@@ -1190,7 +1203,7 @@ const TIMING_IMPORTS = Threads.Atomic{Int}(0)
 # these return either the array of modules loaded from the path / content given
 # or an Exception that describes why it couldn't be loaded
 # and it reconnects the Base.Docs.META
-function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing)
+function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{Nothing, String}, depmods::Vector{Any}, ignore_native::Union{Nothing,Bool}=nothing; register::Bool=true)
     if isnothing(ignore_native)
         if JLOptions().code_coverage == 0 && JLOptions().malloc_log == 0
             ignore_native = false
@@ -1214,11 +1227,11 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
         t_comp_before = cumulative_compile_time_ns()
     end
 
-    for i in 1:length(depmods)
+    for i in eachindex(depmods)
         dep = depmods[i]
         dep isa Module && continue
         _, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
-        dep = loaded_precompiles[depkey => depbuild_id]
+        dep = something(maybe_loaded_precompile(depkey, depbuild_id))
         @assert PkgId(dep) == depkey && module_build_id(dep) === depbuild_id
         depmods[i] = dep
     end
@@ -1239,23 +1252,11 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
     for M in restored
         M = M::Module
         if parentmodule(M) === M && PkgId(M) == pkg
+            register && register_root_module(M)
             if timing_imports
-                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
+                elapsed_time = time_ns() - t_before
                 comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
-                print(lpad(elapsed, 9), " ms  ")
-                parentid = get(EXT_PRIMED, pkg, nothing)
-                if parentid !== nothing
-                    print(parentid.name, " → ")
-                end
-                print(pkg.name)
-                if comp_time > 0
-                    printstyled(" ", Ryu.writefixed(Float64(100 * comp_time / (elapsed * 1e6)), 2), "% compilation time", color = Base.info_color())
-                end
-                if recomp_time > 0
-                    perc = Float64(100 * recomp_time / comp_time)
-                    printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
-                end
-                println()
+                print_time_imports_report(M, elapsed_time, comp_time, recomp_time)
             end
             return M
         end
@@ -1267,8 +1268,76 @@ function _include_from_serialized(pkg::PkgId, path::String, ocachepath::Union{No
     end
 end
 
+# printing functions for @time_imports
+# note that the time inputs are UInt64 on all platforms. Give default values here so that we don't have
+# confusing UInt64 types in generate_precompile.jl
+function print_time_imports_report(
+        mod::Module,
+        elapsed_time::UInt64=UInt64(1),
+        comp_time::UInt64=UInt64(1),
+        recomp_time::UInt64=UInt64(1)
+    )
+    print(lpad(round(elapsed_time / 1e6, digits=1), 9), " ms  ")
+    ext_parent = extension_parent_name(mod)
+    if ext_parent !== nothing
+        print(ext_parent::String, " → ")
+    end
+    print(string(mod))
+    if comp_time > 0
+        perc = Ryu.writefixed(Float64(100 * comp_time / (elapsed_time)), 2)
+        printstyled(" $perc% compilation time", color = Base.info_color())
+    end
+    if recomp_time > 0
+        perc = Float64(100 * recomp_time / comp_time)
+        perc_show = perc < 1 ? "<1" : Ryu.writefixed(perc, 0)
+        printstyled(" ($perc_show% recompilation)", color = Base.warn_color())
+    end
+    println()
+end
+function print_time_imports_report_init(
+        mod::Module, i::Int=1,
+        elapsed_time::UInt64=UInt64(1),
+        comp_time::UInt64=UInt64(1),
+        recomp_time::UInt64=UInt64(1)
+    )
+    connector = i > 1 ? "├" : "┌"
+    printstyled("               $connector ", color = :light_black)
+    print("$(round(elapsed_time / 1e6, digits=1)) ms $mod.__init__() ")
+    if comp_time > 0
+        perc = Ryu.writefixed(Float64(100 * (comp_time) / elapsed_time), 2)
+        printstyled("$perc% compilation time", color = Base.info_color())
+    end
+    if recomp_time > 0
+        perc = Float64(100 * recomp_time / comp_time)
+        printstyled(" ($(perc < 1 ? "<1" : Ryu.writefixed(perc, 0))% recompilation)", color = Base.warn_color())
+    end
+    println()
+end
+
+# if M is an extension, return the string name of the parent. Otherwise return nothing
+function extension_parent_name(M::Module)
+    rootmodule = moduleroot(M)
+    src_path = pathof(rootmodule)
+    src_path === nothing && return nothing
+    pkgdir_parts = splitpath(src_path)
+    ext_pos = findlast(==("ext"), pkgdir_parts)
+    if ext_pos !== nothing && ext_pos >= length(pkgdir_parts) - 2
+        parent_package_root = joinpath(pkgdir_parts[1:ext_pos-1]...)
+        parent_package_project_file = locate_project_file(parent_package_root)
+        if parent_package_project_file isa String
+            d = parsed_toml(parent_package_project_file)
+            name = get(d, "name", nothing)
+            if name !== nothing
+                return name
+            end
+        end
+    end
+    return nothing
+end
+
 function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
     # This function is also used by PkgCacheInspector.jl
+    assert_havelock(require_lock)
     restored = sv[1]::Vector{Any}
     for M in restored
         M = M::Module
@@ -1277,7 +1346,7 @@ function register_restored_modules(sv::SimpleVector, pkg::PkgId, path::String)
         end
         if parentmodule(M) === M
             push!(loaded_modules_order, M)
-            loaded_precompiles[pkg => module_build_id(M)] = M
+            push!(get!(Vector{Module}, loaded_precompiles, pkg), M)
         end
     end
 
@@ -1303,36 +1372,25 @@ function run_module_init(mod::Module, i::Int=1)
     # `i` informs ordering for the `@time_imports` report formatting
     if TIMING_IMPORTS[] == 0
         ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
-    else
-        if isdefined(mod, :__init__)
-            connector = i > 1 ? "├" : "┌"
-            printstyled("               $connector ", color = :light_black)
-
-            elapsedtime = time_ns()
-            cumulative_compile_timing(true)
-            compile_elapsedtimes = cumulative_compile_time_ns()
+    elseif isdefined(mod, :__init__)
+        elapsed_time = time_ns()
+        cumulative_compile_timing(true)
+        compile_elapsedtimes = cumulative_compile_time_ns()
 
-            ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
+        ccall(:jl_init_restored_module, Cvoid, (Any,), mod)
 
-            elapsedtime = (time_ns() - elapsedtime) / 1e6
-            cumulative_compile_timing(false);
-            comp_time, recomp_time = (cumulative_compile_time_ns() .- compile_elapsedtimes) ./ 1e6
+        elapsed_time = time_ns() - elapsed_time
+        cumulative_compile_timing(false);
+        comp_time, recomp_time = cumulative_compile_time_ns() .- compile_elapsedtimes
 
-            print("$(round(elapsedtime, digits=1)) ms $mod.__init__() ")
-            if comp_time > 0
-                printstyled(Ryu.writefixed(Float64(100 * comp_time / elapsedtime), 2), "% compilation time", color = Base.info_color())
-            end
-            if recomp_time > 0
-                perc = Float64(100 * recomp_time / comp_time)
-                printstyled(" ($(perc < 1 ? "<1" : Ryu.writefixed(perc, 0))% recompilation)", color = Base.warn_color())
-            end
-            println()
-        end
+        print_time_imports_report_init(mod, i, elapsed_time, comp_time, recomp_time)
     end
 end
 
 function run_package_callbacks(modkey::PkgId)
-    run_extension_callbacks(modkey)
+    if !precompiling_extension
+        run_extension_callbacks(modkey)
+    end
     assert_havelock(require_lock)
     unlock(require_lock)
     try
@@ -1448,7 +1506,7 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}
         triggers = triggers::Union{String, Vector{String}}
         triggers isa String && (triggers = [triggers])
         id = PkgId(uuid5(parent.uuid::UUID, ext), ext)
-        if id in keys(EXT_PRIMED) || haskey(Base.loaded_modules, id)
+        if haskey(EXT_PRIMED, id) || haskey(Base.loaded_modules, id)
             continue  # extension is already primed or loaded, don't add it again
         end
         EXT_PRIMED[id] = parent
@@ -1469,6 +1527,7 @@ function _insert_extension_triggers(parent::PkgId, extensions::Dict{String, Any}
     end
 end
 
+precompiling_package::Bool = false
 loading_extension::Bool = false
 precompiling_extension::Bool = false
 function run_extension_callbacks(extid::ExtensionId)
@@ -1706,8 +1765,7 @@ function compilecache_path(pkg::PkgId;
         end
         staledeps, _, _ = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
         # finish checking staledeps module graph
-        for i in 1:length(staledeps)
-            dep = staledeps[i]
+        for dep in staledeps
             dep isa Module && continue
             modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
             modpaths = find_all_in_cache_path(modkey)
@@ -1878,8 +1936,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String, ocachepath::Union
         depmods[i] = dep
     end
     # then load the file
-    loaded = _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native)
-    loaded isa Module && register_root_module(loaded)
+    loaded = _include_from_serialized(pkg, path, ocachepath, depmods, ignore_native; register = true)
     return loaded
 end
 
@@ -1889,92 +1946,102 @@ end
     assert_havelock(require_lock)
     paths = find_all_in_cache_path(pkg, DEPOT_PATH)
     newdeps = PkgId[]
-    for path_to_try in paths::Vector{String}
-        staledeps = stale_cachefile(pkg, build_id, sourcepath, path_to_try; reasons, stalecheck)
-        if staledeps === true
-            continue
-        end
-        try
-            staledeps, ocachefile, newbuild_id = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
-            # finish checking staledeps module graph
-            for i in 1:length(staledeps)
-                dep = staledeps[i]
-                dep isa Module && continue
-                modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
-                modpaths = find_all_in_cache_path(modkey, DEPOT_PATH)
-                for modpath_to_try in modpaths
-                    modstaledeps = stale_cachefile(modkey, modbuild_id, modpath, modpath_to_try; stalecheck)
-                    if modstaledeps === true
-                        continue
-                    end
-                    modstaledeps, modocachepath, _ = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
-                    staledeps[i] = (modpath, modkey, modbuild_id, modpath_to_try, modstaledeps, modocachepath)
-                    @goto check_next_dep
+    try_build_ids = UInt128[build_id]
+    if build_id == UInt128(0)
+        let loaded = get(loaded_precompiles, pkg, nothing)
+            if loaded !== nothing
+                for mod in loaded # try these in reverse original load order to see if one is already valid
+                    pushfirst!(try_build_ids, module_build_id(mod))
                 end
-                @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
-                @goto check_next_path
-                @label check_next_dep
             end
-            M = get(loaded_precompiles, pkg => newbuild_id, nothing)
-            if isa(M, Module)
-                stalecheck && register_root_module(M)
-                return M
-            end
-            if stalecheck
-                try
-                    touch(path_to_try) # update timestamp of precompilation file
-                catch ex # file might be read-only and then we fail to update timestamp, which is fine
-                    ex isa IOError || rethrow()
-                end
+        end
+    end
+    for build_id in try_build_ids
+        for path_to_try in paths::Vector{String}
+            staledeps = stale_cachefile(pkg, build_id, sourcepath, path_to_try; reasons, stalecheck)
+            if staledeps === true
+                continue
             end
-            # finish loading module graph into staledeps
-            # TODO: call all start_loading calls (in reverse order) before calling any _include_from_serialized, since start_loading will drop the loading lock
-            for i in 1:length(staledeps)
-                dep = staledeps[i]
-                dep isa Module && continue
-                modpath, modkey, modbuild_id, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, UInt128, String, Vector{Any}, Union{Nothing, String}}
-                dep = start_loading(modkey, modbuild_id, stalecheck)
-                while true
-                    if dep isa Module
-                        if PkgId(dep) == modkey && module_build_id(dep) === modbuild_id
-                            break
-                        else
-                            @debug "Rejecting cache file $path_to_try because module $modkey got loaded at a different version than expected."
-                            @goto check_next_path
+            try
+                staledeps, ocachefile, newbuild_id = staledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
+                # finish checking staledeps module graph
+                for i in eachindex(staledeps)
+                    dep = staledeps[i]
+                    dep isa Module && continue
+                    modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
+                    modpaths = find_all_in_cache_path(modkey, DEPOT_PATH)
+                    for modpath_to_try in modpaths
+                        modstaledeps = stale_cachefile(modkey, modbuild_id, modpath, modpath_to_try; stalecheck)
+                        if modstaledeps === true
+                            continue
                         end
+                        modstaledeps, modocachepath, _ = modstaledeps::Tuple{Vector{Any}, Union{Nothing, String}, UInt128}
+                        staledeps[i] = (modpath, modkey, modbuild_id, modpath_to_try, modstaledeps, modocachepath)
+                        @goto check_next_dep
+                    end
+                    @debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
+                    @goto check_next_path
+                    @label check_next_dep
+                end
+                M = maybe_loaded_precompile(pkg, newbuild_id)
+                if isa(M, Module)
+                    stalecheck && register_root_module(M)
+                    return M
+                end
+                if stalecheck
+                    try
+                        touch(path_to_try) # update timestamp of precompilation file
+                    catch ex # file might be read-only and then we fail to update timestamp, which is fine
+                        ex isa IOError || rethrow()
                     end
-                    if dep === nothing
-                        try
-                            set_pkgorigin_version_path(modkey, modpath)
-                            dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps)
-                            dep isa Module && stalecheck && register_root_module(dep)
-                        finally
-                            end_loading(modkey, dep)
+                end
+                # finish loading module graph into staledeps
+                # TODO: call all start_loading calls (in reverse order) before calling any _include_from_serialized, since start_loading will drop the loading lock
+                for i in eachindex(staledeps)
+                    dep = staledeps[i]
+                    dep isa Module && continue
+                    modpath, modkey, modbuild_id, modcachepath, modstaledeps, modocachepath = dep::Tuple{String, PkgId, UInt128, String, Vector{Any}, Union{Nothing, String}}
+                    dep = start_loading(modkey, modbuild_id, stalecheck)
+                    while true
+                        if dep isa Module
+                            if PkgId(dep) == modkey && module_build_id(dep) === modbuild_id
+                                break
+                            else
+                                @debug "Rejecting cache file $path_to_try because module $modkey got loaded at a different version than expected."
+                                @goto check_next_path
+                            end
                         end
-                        if !isa(dep, Module)
-                            @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep
-                            @goto check_next_path
-                        else
-                            push!(newdeps, modkey)
+                        if dep === nothing
+                            try
+                                set_pkgorigin_version_path(modkey, modpath)
+                                dep = _include_from_serialized(modkey, modcachepath, modocachepath, modstaledeps; register = stalecheck)
+                            finally
+                                end_loading(modkey, dep)
+                            end
+                            if !isa(dep, Module)
+                                @debug "Rejecting cache file $path_to_try because required dependency $modkey failed to load from cache file for $modcachepath." exception=dep
+                                @goto check_next_path
+                            else
+                                push!(newdeps, modkey)
+                            end
                         end
                     end
+                    staledeps[i] = dep
                 end
-                staledeps[i] = dep
-            end
-            restored = get(loaded_precompiles, pkg => newbuild_id, nothing)
-            if !isa(restored, Module)
-                restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps)
-            end
-            isa(restored, Module) && stalecheck && register_root_module(restored)
-            isa(restored, Module) && return restored
-            @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
-            @label check_next_path
-        finally
-            for modkey in newdeps
-                insert_extension_triggers(modkey)
-                stalecheck && run_package_callbacks(modkey)
+                restored = maybe_loaded_precompile(pkg, newbuild_id)
+                if !isa(restored, Module)
+                    restored = _include_from_serialized(pkg, path_to_try, ocachefile, staledeps; register = stalecheck)
+                end
+                isa(restored, Module) && return restored
+                @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
+                @label check_next_path
+            finally
+                for modkey in newdeps
+                    insert_extension_triggers(modkey)
+                    stalecheck && run_package_callbacks(modkey)
+                end
+                empty!(newdeps)
             end
-            empty!(newdeps)
         end
     end
     return nothing
@@ -1993,7 +2060,7 @@ function start_loading(modkey::PkgId, build_id::UInt128, stalecheck::Bool)
         loaded = stalecheck ? maybe_root_module(modkey) : nothing
         loaded isa Module && return loaded
         if build_id != UInt128(0)
-            loaded = get(loaded_precompiles, modkey => build_id, nothing)
+            loaded = maybe_loaded_precompile(modkey, build_id)
             loaded isa Module && return loaded
         end
         loading = get(package_locks, modkey, nothing)
@@ -2164,6 +2231,11 @@ For more details regarding code loading, see the manual sections on [modules](@r
 [parallel computing](@ref code-availability).
 """
 function require(into::Module, mod::Symbol)
+    if into === Base.__toplevel__ && precompiling_package
+        # this error type needs to match the error type compilecache throws for non-125 errors.
+        error("`using/import $mod` outside of a Module detected. Importing a package outside of a module \
+         is not allowed during package precompilation.")
+    end
     if _require_world_age[] != typemax(UInt)
         Base.invoke_in_world(_require_world_age[], __require, into, mod)
     else
@@ -2318,12 +2390,22 @@ const pkgorigins = Dict{PkgId,PkgOrigin}()
 
 const explicit_loaded_modules = Dict{PkgId,Module}() # Emptied on Julia start
 const loaded_modules = Dict{PkgId,Module}() # available to be explicitly loaded
-const loaded_precompiles = Dict{Pair{PkgId,UInt128},Module}() # extended (complete) list of modules, available to be loaded
+const loaded_precompiles = Dict{PkgId,Vector{Module}}() # extended (complete) list of modules, available to be loaded
 const loaded_modules_order = Vector{Module}()
 const module_keys = IdDict{Module,PkgId}() # the reverse of loaded_modules
 
 root_module_key(m::Module) = @lock require_lock module_keys[m]
 
+function maybe_loaded_precompile(key::PkgId, buildid::UInt128)
+    @lock require_lock begin
+    mods = get(loaded_precompiles, key, nothing)
+    mods === nothing && return
+    for mod in mods
+        module_build_id(mod) == buildid && return mod
+    end
+    end
+end
+
 function module_build_id(m::Module)
     hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
     return (UInt128(hi) << 64) | lo
@@ -2344,7 +2426,7 @@ end
             end
         end
     end
-    haskey(loaded_precompiles, key => module_build_id(m)) || push!(loaded_modules_order, m)
+    maybe_loaded_precompile(key, module_build_id(m)) === nothing && push!(loaded_modules_order, m)
     loaded_modules[key] = m
     explicit_loaded_modules[key] = m
     module_keys[m] = key
@@ -2529,6 +2611,46 @@ function _require_from_serialized(uuidkey::PkgId, path::String, ocachepath::Unio
 end
 
 # load a serialized file directly from append_bundled_depot_path for uuidkey without stalechecks
+"""
+    require_stdlib(package_uuidkey::PkgId, ext::Union{Nothing, String}=nothing)
+
+!!! warning "May load duplicate copies of stdlib packages."
+
+    This requires that all stdlib packages loaded are compatible with having concurrent
+    copies of themselves loaded into memory. It also places additional restrictions on
+    the kinds of type-piracy that are allowed in stdlibs, since type-piracy can cause the
+    dispatch table to become visibly "torn" across multiple different packages.
+
+    The specific requirements are:
+
+      The import side (caller of `require_stdlib`) must not leak any stdlib types, esp.
+      to any context that may have a conflicting copy of the stdlib(s) (or vice-versa).
+         - e.g., if an output is forwarded to user code, it must contain only Base types.
+         - e.g., if an output contains types from the stdlib, it must be consumed "internally"
+                 before reaching user code.
+
+      The imported code (loaded stdlibs) must be very careful about type piracy:
+         - It must not access any global state that may differ between stdlib copies in
+           type-pirated methods.
+         - It must not return any stdlib types from any type-pirated public methods (since
+           a loaded duplicate would overwrite the Base method again, returning different
+           types that don't correspond to the user-accessible copy of the stdlib).
+         - It must not pass / discriminate stdlib types in type-pirated methods, except
+           indirectly via methods defined in Base and implemented (w/o type-piracy) in
+           all copies of the stdlib over their respective types.
+
+      The idea behind the above restrictions is that any type-pirated methods in the stdlib
+      must return a result that is simultaneously correct for all of the stdlib's loaded
+      copies, including accounting for global state differences and split type identities.
+
+      Furthermore, any imported code must not leak any stdlib types to globals and containers
+      (e.g. Vectors and mutable structs) in upstream Modules, since this will also lead to
+      type-confusion when the type is later pulled out in user / stdlib code.
+
+    For examples of issues like the above, see:
+      [1] https://github.com/JuliaLang/Pkg.jl/issues/4017#issuecomment-2377589989
+      [2] https://github.com/JuliaLang/StyledStrings.jl/issues/91#issuecomment-2379602914
+"""
 function require_stdlib(package_uuidkey::PkgId, ext::Union{Nothing, String}=nothing)
     @lock require_lock begin
     # the PkgId of the ext, or package if not an ext
@@ -2716,8 +2838,8 @@ function evalfile(path::AbstractString, args::Vector{String}=String[])
         Expr(:toplevel,
              :(const ARGS = $args),
              :(eval(x) = $(Expr(:core, :eval))(__anon__, x)),
-             :(include(x) = $(Expr(:top, :include))(__anon__, x)),
-             :(include(mapexpr::Function, x) = $(Expr(:top, :include))(mapexpr, __anon__, x)),
+             :(include(x::AbstractString) = $(Expr(:top, :include))(__anon__, x)),
+             :(include(mapexpr::Function, x::AbstractString) = $(Expr(:top, :include))(mapexpr, __anon__, x)),
              :(include($path))))
 end
 evalfile(path::AbstractString, args::Vector) = evalfile(path, String[args...])
@@ -2741,41 +2863,10 @@ function load_path_setup_code(load_path::Bool=true)
     return code
 end
 
-"""
-    check_src_module_wrap(srcpath::String)
-
-Checks that a package entry file `srcpath` has a module declaration, and that it is before any using/import statements.
-"""
-function check_src_module_wrap(pkg::PkgId, srcpath::String)
-    module_rgx = r"^(|end |\"\"\" )\s*(?:@)*(?:bare)?module\s"
-    load_rgx = r"\b(?:using|import)\s"
-    load_seen = false
-    inside_string = false
-    for s in eachline(srcpath)
-        if count("\"\"\"", s) == 1
-            # ignore module docstrings
-            inside_string = !inside_string
-        end
-        inside_string && continue
-        if contains(s, module_rgx)
-            if load_seen
-                throw(ErrorException("Package $(repr("text/plain", pkg)) source file $srcpath has a using/import before a module declaration."))
-            end
-            return true
-        end
-        if startswith(s, load_rgx)
-            load_seen = true
-        end
-    end
-    throw(ErrorException("Package $(repr("text/plain", pkg)) source file $srcpath does not contain a module declaration."))
-end
-
 # this is called in the external process that generates precompiled package files
 function include_package_for_output(pkg::PkgId, input::String, depot_path::Vector{String}, dl_load_path::Vector{String}, load_path::Vector{String},
                                     concrete_deps::typeof(_concrete_dependencies), source::Union{Nothing,String})
 
-    check_src_module_wrap(pkg, input)
-
     append!(empty!(Base.DEPOT_PATH), depot_path)
     append!(empty!(Base.DL_LOAD_PATH), dl_load_path)
     append!(empty!(Base.LOAD_PATH), load_path)
@@ -2802,11 +2893,22 @@ function include_package_for_output(pkg::PkgId, input::String, depot_path::Vecto
     finally
         Core.Compiler.track_newly_inferred.x = false
     end
+    # check that the package defined the expected module so we can give a nice error message if not
+    Base.check_package_module_loaded(pkg)
+end
+
+function check_package_module_loaded(pkg::PkgId)
+    if !haskey(Base.loaded_modules, pkg)
+        # match compilecache error type for non-125 errors
+        error("$(repr("text/plain", pkg)) did not define the expected module `$(pkg.name)`, \
+            check for typos in package module name")
+    end
+    return nothing
 end
 
 const PRECOMPILE_TRACE_COMPILE = Ref{String}()
 function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::Union{Nothing, String},
-                           concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+                           concrete_deps::typeof(_concrete_dependencies), flags::Cmd=``, internal_stderr::IO = stderr, internal_stdout::IO = stdout, isext::Bool=false)
     @nospecialize internal_stderr internal_stdout
     rm(output, force=true)   # Remove file if it exists
     output_o === nothing || rm(output_o, force=true)
@@ -2875,7 +2977,8 @@ function create_expr_cache(pkg::PkgId, input::String, output::String, output_o::
     write(io.in, """
         empty!(Base.EXT_DORMITORY) # If we have a custom sysimage with `EXT_DORMITORY` prepopulated
         Base.track_nested_precomp($precomp_stack)
-        Base.precompiling_extension = $(loading_extension)
+        Base.precompiling_extension = $(loading_extension | isext)
+        Base.precompiling_package = true
         Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
             $(repr(load_path)), $deps, $(repr(source_path(nothing))))
         """)
@@ -2932,18 +3035,18 @@ This can be used to reduce package load times. Cache files are stored in
 `DEPOT_PATH[1]/compiled`. See [Module initialization and precompilation](@ref)
 for important notes.
 """
-function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout; flags::Cmd=``, reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}())
+function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout; flags::Cmd=``, reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}(), isext::Bool=false)
     @nospecialize internal_stderr internal_stdout
     path = locate_package(pkg)
     path === nothing && throw(ArgumentError("$(repr("text/plain", pkg)) not found during precompilation"))
-    return compilecache(pkg, path, internal_stderr, internal_stdout; flags, reasons)
+    return compilecache(pkg, path, internal_stderr, internal_stdout; flags, reasons, isext)
 end
 
 const MAX_NUM_PRECOMPILE_FILES = Ref(10)
 
 function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
                       keep_loaded_modules::Bool = true; flags::Cmd=``, cacheflags::CacheFlags=CacheFlags(),
-                      reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}())
+                      reasons::Union{Dict{String,Int},Nothing}=Dict{String,Int}(), isext::Bool=false)
 
     @nospecialize internal_stderr internal_stdout
     # decide where to put the resulting cache file
@@ -2983,7 +3086,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
             close(tmpio_o)
             close(tmpio_so)
         end
-        p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, internal_stderr, internal_stdout)
+        p = create_expr_cache(pkg, path, tmppath, tmppath_o, concrete_deps, flags, internal_stderr, internal_stdout, isext)
 
         if success(p)
             if cache_objects
@@ -3048,7 +3151,9 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
                 end
             end
             # this is atomic according to POSIX (not Win32):
-            rename(tmppath, cachefile; force=true)
+            # but force=true means it will fall back to non atomic
+            # move if the initial rename fails.
+            mv(tmppath, cachefile; force=true)
             return cachefile, ocachefile
         end
     finally
@@ -3067,7 +3172,7 @@ end
 
 function rename_unique_ocachefile(tmppath_so::String, ocachefile_orig::String, ocachefile::String = ocachefile_orig, num = 0)
     try
-        rename(tmppath_so, ocachefile; force=true)
+        mv(tmppath_so, ocachefile; force=true)
     catch e
         e isa IOError || rethrow()
         # If `rm` was called on a dir containing a loaded DLL, we moved it to temp for cleanup
@@ -3123,16 +3228,9 @@ mutable struct CacheHeaderIncludes
     const modpath::Vector{String}   # seemingly not needed in Base, but used by Revise
 end
 
-function replace_depot_path(path::AbstractString)
-    for depot in DEPOT_PATH
-        !isdir(depot) && continue
-
-        # Strip extraneous pathseps through normalization.
-        if isdirpath(depot)
-            depot = dirname(depot)
-        end
-
-        if startswith(path, depot)
+function replace_depot_path(path::AbstractString, depots::Vector{String}=normalize_depots_for_relocation())
+    for depot in depots
+        if startswith(path, string(depot, Filesystem.pathsep())) || path == depot
             path = replace(path, depot => "@depot"; count=1)
             break
         end
@@ -3140,6 +3238,19 @@ function replace_depot_path(path::AbstractString)
     return path
 end
 
+function normalize_depots_for_relocation()
+    depots = String[]
+    sizehint!(depots, length(DEPOT_PATH))
+    for d in DEPOT_PATH
+        isdir(d) || continue
+        if isdirpath(d)
+            d = dirname(d)
+        end
+        push!(depots, abspath(d))
+    end
+    return depots
+end
+
 function restore_depot_path(path::AbstractString, depot::AbstractString)
     replace(path, r"^@depot" => depot; count=1)
 end
@@ -3665,7 +3776,13 @@ end
                                           ignore_loaded::Bool=false, requested_flags::CacheFlags=CacheFlags(),
                                           reasons::Union{Dict{String,Int},Nothing}=nothing, stalecheck::Bool=true)
     # n.b.: this function does nearly all of the file validation, not just those checks related to stale, so the name is potentially unclear
-    io = open(cachefile, "r")
+    io = try
+        open(cachefile, "r")
+    catch ex
+        ex isa IOError || ex isa SystemError || rethrow()
+        @debug "Rejecting cache file $cachefile for $modkey because it could not be opened" isfile(cachefile)
+        return true
+    end
     try
         checksum = isvalid_cache_header(io)
         if iszero(checksum)
@@ -3735,8 +3852,8 @@ end
         for i in 1:ndeps
             req_key, req_build_id = required_modules[i]
             # Check if module is already loaded
-            if !stalecheck && haskey(loaded_precompiles, req_key => req_build_id)
-                M = loaded_precompiles[req_key => req_build_id]
+            M = stalecheck ? nothing : maybe_loaded_precompile(req_key, req_build_id)
+            if M !== nothing
                 @assert PkgId(M) == req_key && module_build_id(M) === req_build_id
                 depmods[i] = M
             elseif root_module_exists(req_key)
@@ -3788,10 +3905,17 @@ end
 
         # now check if this file's content hash has changed relative to its source files
         if stalecheck
-            if !samefile(includes[1].filename, modpath) && !samefile(fixup_stdlib_path(includes[1].filename), modpath)
-                @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
-                record_reason(reasons, "wrong source")
-                return true # cache file was compiled from a different path
+            if !samefile(includes[1].filename, modpath)
+                # In certain cases the path rewritten by `fixup_stdlib_path` may
+                # point to an unreadable directory, make sure we can `stat` the
+                # file before comparing it with `modpath`.
+                stdlib_path = fixup_stdlib_path(includes[1].filename)
+                if !(isreadable(stdlib_path) && samefile(stdlib_path, modpath))
+                    !samefile(fixup_stdlib_path(includes[1].filename), modpath)
+                    @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
+                    record_reason(reasons, "wrong source")
+                    return true # cache file was compiled from a different path
+                end
             end
             for (modkey, req_modkey) in requires
                 # verify that `require(modkey, name(req_modkey))` ==> `req_modkey`
@@ -3811,7 +3935,7 @@ end
                 end
                 if !ispath(f)
                     _f = fixup_stdlib_path(f)
-                    if isfile(_f) && startswith(_f, Sys.STDLIB)
+                    if _f != f && isfile(_f) && startswith(_f, Sys.STDLIB)
                         continue
                     end
                     @debug "Rejecting stale cache file $cachefile because file $f does not exist"
@@ -3833,13 +3957,14 @@ end
                         return true
                     end
                 else
-                    fsize = filesize(f)
+                    fstat = stat(f)
+                    fsize = filesize(fstat)
                     if fsize != fsize_req
                         @debug "Rejecting stale cache file $cachefile because file size of $f has changed (file size $fsize, before $fsize_req)"
                         record_reason(reasons, "include_dependency fsize change")
                         return true
                     end
-                    hash = isdir(f) ? _crc32c(join(readdir(f))) : open(_crc32c, f, "r")
+                    hash = isdir(fstat) ? _crc32c(join(readdir(f))) : open(_crc32c, f, "r")
                     if hash != hash_req
                         @debug "Rejecting stale cache file $cachefile because hash of $f has changed (hash $hash, before $hash_req)"
                         record_reason(reasons, "include_dependency fhash change")
diff --git a/base/lock.jl b/base/lock.jl
index 7cbb023a78ee4..b473045e5809d 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -51,6 +51,20 @@ end
 
 assert_havelock(l::ReentrantLock) = assert_havelock(l, l.locked_by)
 
+show(io::IO, ::ReentrantLock) = print(io, ReentrantLock, "()")
+
+function show(io::IO, ::MIME"text/plain", l::ReentrantLock)
+    show(io, l)
+    if !(get(io, :compact, false)::Bool)
+        locked_by = l.locked_by
+        if locked_by isa Task
+            print(io, " (locked by ", locked_by === current_task() ? "current " : "", locked_by, ")")
+        else
+            print(io, " (unlocked)")
+        end
+    end
+end
+
 """
     islocked(lock) -> Status (Boolean)
 
@@ -498,10 +512,10 @@ This provides an acquire & release memory ordering on notify/wait.
     The `autoreset` functionality and memory ordering guarantee requires at least Julia 1.8.
 """
 mutable struct Event
-    const notify::ThreadSynchronizer
+    const notify::Threads.Condition
     const autoreset::Bool
     @atomic set::Bool
-    Event(autoreset::Bool=false) = new(ThreadSynchronizer(), autoreset, false)
+    Event(autoreset::Bool=false) = new(Threads.Condition(), autoreset, false)
 end
 
 function wait(e::Event)
diff --git a/base/logging/logging.jl b/base/logging/logging.jl
index 7124ffe25abf2..5cf3882a300ec 100644
--- a/base/logging/logging.jl
+++ b/base/logging/logging.jl
@@ -3,7 +3,7 @@
 module CoreLogging
 
 import Base: isless, +, -, convert, show
-import Base: ScopedValue, with, @with
+import Base.ScopedValues: ScopedValue, with, @with
 
 export
     AbstractLogger,
@@ -60,7 +60,7 @@ function min_enabled_level end
     catch_exceptions(logger)
 
 Return `true` if the logger should catch exceptions which happen during log
-record construction.  By default, messages are caught
+record construction.  By default, messages are caught.
 
 By default all exceptions are caught to prevent log message generation from
 crashing the program.  This lets users confidently toggle little-used
diff --git a/base/math.jl b/base/math.jl
index de275a2afc048..16a8a547e8de1 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -23,7 +23,7 @@ import .Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
 using .Base: sign_mask, exponent_mask, exponent_one,
             exponent_half, uinttype, significand_mask,
             significand_bits, exponent_bits, exponent_bias,
-            exponent_max, exponent_raw_max
+            exponent_max, exponent_raw_max, clamp, clamp!
 
 using Core.Intrinsics: sqrt_llvm
 
@@ -69,104 +69,6 @@ end
     return Txy, T(xy-Txy)
 end
 
-"""
-    clamp(x, lo, hi)
-
-Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
-are promoted to a common type.
-
-See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
-
-!!! compat "Julia 1.3"
-    `missing` as the first argument requires at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
-3-element Vector{BigFloat}:
- 3.141592653589793238462643383279502884197169399375105820974944592307816406286198
- 2.0
- 9.0
-
-julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
-3-element Vector{Int64}:
-  6
-  6
- 10
-```
-"""
-function clamp(x::X, lo::L, hi::H) where {X,L,H}
-    T = promote_type(X, L, H)
-    return (x > hi) ? convert(T, hi) : (x < lo) ? convert(T, lo) : convert(T, x)
-end
-
-"""
-    clamp(x, T)::T
-
-Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
-
-See also [`trunc`](@ref).
-
-# Examples
-```jldoctest
-julia> clamp(200, Int8)
-127
-
-julia> clamp(-200, Int8)
--128
-
-julia> trunc(Int, 4pi^2)
-39
-```
-"""
-function clamp(x, ::Type{T}) where {T<:Integer}
-    # delegating to clamp(x, typemin(T), typemax(T)) would promote types
-    # this way, we avoid unnecessary conversions
-    # think of, e.g., clamp(big(2) ^ 200, Int16)
-    lo = typemin(T)
-    hi = typemax(T)
-    return (x > hi) ? hi : (x < lo) ? lo : convert(T, x)
-end
-
-
-"""
-    clamp!(array::AbstractArray, lo, hi)
-
-Restrict values in `array` to the specified range, in-place.
-See also [`clamp`](@ref).
-
-!!! compat "Julia 1.3"
-    `missing` entries in `array` require at least Julia 1.3.
-
-# Examples
-```jldoctest
-julia> row = collect(-4:4)';
-
-julia> clamp!(row, 0, Inf)
-1×9 adjoint(::Vector{Int64}) with eltype Int64:
- 0  0  0  0  0  1  2  3  4
-
-julia> clamp.((-4:4)', 0, Inf)
-1×9 Matrix{Float64}:
- 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
-```
-"""
-function clamp!(x::AbstractArray, lo, hi)
-    @inbounds for i in eachindex(x)
-        x[i] = clamp(x[i], lo, hi)
-    end
-    x
-end
-
-"""
-    clamp(x::Integer, r::AbstractUnitRange)
-
-Clamp `x` to lie within range `r`.
-
-!!! compat "Julia 1.6"
-     This method requires at least Julia 1.6.
-"""
-clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
 
 """
     evalpoly(x, p)
@@ -1374,14 +1276,12 @@ end
     return ifelse(isfinite(x) & isfinite(err), muladd(x, y, err), x*y)
 end
 
-function ^(x::Float32, n::Integer)
+function ^(x::Union{Float16,Float32}, n::Integer)
     n == -2 && return (i=inv(x); i*i)
     n == 3 && return x*x*x #keep compatibility with literal_pow
-    n < 0 && return Float32(Base.power_by_squaring(inv(Float64(x)),-n))
-    Float32(Base.power_by_squaring(Float64(x),n))
+    n < 0 && return oftype(x, Base.power_by_squaring(inv(widen(x)),-n))
+    oftype(x, Base.power_by_squaring(widen(x),n))
 end
-@inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ y)
-@inline literal_pow(::typeof(^), x::Float16, ::Val{p}) where {p} = Float16(literal_pow(^,Float32(x),Val(p)))
 
 ## rem2pi-related calculations ##
 
@@ -1690,7 +1590,6 @@ end
 
 exp2(x::AbstractFloat) = 2^x
 exp10(x::AbstractFloat) = 10^x
-clamp(::Missing, lo, hi) = missing
 fourthroot(::Missing) = missing
 
 end # module
diff --git a/base/missing.jl b/base/missing.jl
index ce174edc297e3..1f34195efed88 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -135,6 +135,7 @@ min(::Any,     ::Missing) = missing
 max(::Missing, ::Missing) = missing
 max(::Missing, ::Any)     = missing
 max(::Any,     ::Missing) = missing
+clamp(::Missing, lo, hi) = missing
 
 missing_conversion_msg(@nospecialize T) =
     LazyString("cannot convert a missing value to type ", T, ": use Union{", T, ", Missing} instead")
diff --git a/base/mpfr.jl b/base/mpfr.jl
index ed3ea5937ce87..9d1a0843ebe06 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -18,12 +18,10 @@ import
         setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
         isone, big, _string_n, decompose, minmax, _precision_with_base_2,
         sinpi, cospi, sincospi, tanpi, sind, cosd, tand, asind, acosd, atand,
-        uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask,
-        RawBigIntRoundingIncrementHelper, truncated, RawBigInt
-
+        uinttype, exponent_max, exponent_min, ieee754_representation, significand_mask
 
 using .Base.Libc
-import ..Rounding:
+import ..Rounding: Rounding,
     rounding_raw, setrounding_raw, rounds_to_nearest, rounds_away_from_zero,
     tie_breaker_is_to_even, correct_rounding_requires_increment
 
@@ -39,7 +37,6 @@ else
     const libmpfr = "libmpfr.so.6"
 end
 
-
 version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Ptr{Cchar}, ())))
 patches() = split(unsafe_string(ccall((:mpfr_get_patches,libmpfr), Ptr{Cchar}, ())),' ')
 
@@ -109,9 +106,9 @@ end
 tie_breaker_is_to_even(::MPFRRoundingMode) = true
 
 const ROUNDING_MODE = Ref{MPFRRoundingMode}(MPFRRoundNearest)
-const CURRENT_ROUNDING_MODE = Base.ScopedValue{MPFRRoundingMode}()
+const CURRENT_ROUNDING_MODE = Base.ScopedValues.ScopedValue{MPFRRoundingMode}()
 const DEFAULT_PRECISION = Ref{Clong}(256)
-const CURRENT_PRECISION = Base.ScopedValue{Clong}()
+const CURRENT_PRECISION = Base.ScopedValues.ScopedValue{Clong}()
 # Basic type and initialization definitions
 
 # Warning: the constants are MPFR implementation details from
@@ -120,69 +117,129 @@ const mpfr_special_exponent_zero = typemin(Clong) + true
 const mpfr_special_exponent_nan = mpfr_special_exponent_zero + true
 const mpfr_special_exponent_inf = mpfr_special_exponent_nan + true
 
+struct BigFloatLayout
+    prec::Clong
+    sign::Cint
+    exp::Clong
+    d::Ptr{Limb}
+    # possible padding
+    p::Limb # Tuple{Vararg{Limb}}
+end
+const offset_prec = fieldoffset(BigFloatLayout, 1) % Int
+const offset_sign = fieldoffset(BigFloatLayout, 2) % Int
+const offset_exp = fieldoffset(BigFloatLayout, 3) % Int
+const offset_d = fieldoffset(BigFloatLayout, 4) % Int
+const offset_p_limbs = ((fieldoffset(BigFloatLayout, 5) % Int + sizeof(Limb) - 1) ÷ sizeof(Limb))
+const offset_p = offset_p_limbs * sizeof(Limb)
+
 """
     BigFloat <: AbstractFloat
 
 Arbitrary precision floating point number type.
 """
-mutable struct BigFloat <: AbstractFloat
-    prec::Clong
-    sign::Cint
-    exp::Clong
-    d::Ptr{Limb}
-    # _d::Buffer{Limb} # Julia gc handle for memory @ d
-    _d::String # Julia gc handle for memory @ d (optimized)
+struct BigFloat <: AbstractFloat
+    d::Memory{Limb}
 
     # Not recommended for general use:
     # used internally by, e.g. deepcopy
-    global function _BigFloat(prec::Clong, sign::Cint, exp::Clong, d::String)
-        # ccall-based version, inlined below
-        #z = new(zero(Clong), zero(Cint), zero(Clong), C_NULL, d)
-        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), d, prec) # currently seems to be a no-op in mpfr
-        #NAN_KIND = Cint(0)
-        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, d)
-        #return z
-        return new(prec, sign, exp, pointer(d), d)
-    end
+    global _BigFloat(d::Memory{Limb}) = new(d)
 
     function BigFloat(; precision::Integer=_precision_with_base_2(BigFloat))
         precision < 1 && throw(DomainError(precision, "`precision` cannot be less than 1."))
         nb = ccall((:mpfr_custom_get_size,libmpfr), Csize_t, (Clong,), precision)
-        nb = (nb + Core.sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
-        #d = Vector{Limb}(undef, nb)
-        d = _string_n(nb * Core.sizeof(Limb))
-        EXP_NAN = mpfr_special_exponent_nan
-        return _BigFloat(Clong(precision), one(Cint), EXP_NAN, d) # +NAN
+        nl = (nb + offset_p + sizeof(Limb) - 1) ÷ Core.sizeof(Limb) # align to number of Limb allocations required for this
+        d = Memory{Limb}(undef, nl % Int)
+        # ccall-based version, inlined below
+        z = _BigFloat(d) # initialize to +NAN
+        #ccall((:mpfr_custom_init,libmpfr), Cvoid, (Ptr{Limb}, Clong), BigFloatData(d), prec) # currently seems to be a no-op in mpfr
+        #NAN_KIND = Cint(0)
+        #ccall((:mpfr_custom_init_set,libmpfr), Cvoid, (Ref{BigFloat}, Cint, Clong, Ptr{Limb}), z, NAN_KIND, prec, BigFloatData(d))
+        z.prec = Clong(precision)
+        z.sign = one(Cint)
+        z.exp = mpfr_special_exponent_nan
+        return z
     end
 end
 
-# The rounding mode here shouldn't matter.
-significand_limb_count(x::BigFloat) = div(sizeof(x._d), sizeof(Limb), RoundToZero)
+"""
+Segment of raw words of bits interpreted as a big integer. Less
+significant words come first. Each word is in machine-native bit-order.
+"""
+struct BigFloatData{Limb}
+    d::Memory{Limb}
+end
+
+# BigFloat interface
+@inline function Base.getproperty(x::BigFloat, s::Symbol)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    if s === :prec
+        return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_prec)
+    elseif s === :sign
+        return GC.@preserve d unsafe_load(Ptr{Cint}(p) + offset_sign)
+    elseif s === :exp
+        return GC.@preserve d unsafe_load(Ptr{Clong}(p) + offset_exp)
+    elseif s === :d
+        return BigFloatData(d)
+    else
+        return throw(FieldError(typeof(x), s))
+    end
+end
+
+@inline function Base.setproperty!(x::BigFloat, s::Symbol, v)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    if s === :prec
+        return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_prec, v)
+    elseif s === :sign
+        return GC.@preserve d unsafe_store!(Ptr{Cint}(p) + offset_sign, v)
+    elseif s === :exp
+        return GC.@preserve d unsafe_store!(Ptr{Clong}(p) + offset_exp, v)
+    #elseif s === :d # not mutable
+    else
+        return throw(FieldError(x, s))
+    end
+end
+
+# Ref interface: make sure the conversion to C is done properly
+Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = error("not compatible with mpfr")
+Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat}) = error("not compatible with mpfr")
+Base.cconvert(::Type{Ref{BigFloat}}, x::BigFloat) = x.d # BigFloatData is the Ref type for BigFloat
+function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::BigFloatData)
+    d = getfield(x, :d)
+    p = Base.unsafe_convert(Ptr{Limb}, d)
+    GC.@preserve d unsafe_store!(Ptr{Ptr{Limb}}(p) + offset_d, p + offset_p, :monotonic) # :monotonic ensure that TSAN knows that this isn't a data race
+    return Ptr{BigFloat}(p)
+end
+Base.unsafe_convert(::Type{Ptr{Limb}}, fd::BigFloatData) = Base.unsafe_convert(Ptr{Limb}, getfield(fd, :d)) + offset_p
+function Base.setindex!(fd::BigFloatData, v, i)
+    d = getfield(fd, :d)
+    @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i))
+    @inbounds d[i + offset_p_limbs] = v
+    return fd
+end
+function Base.getindex(fd::BigFloatData, i)
+    d = getfield(fd, :d)
+    @boundscheck 1 <= i <= length(d) - offset_p_limbs || throw(BoundsError(fd, i))
+    @inbounds d[i + offset_p_limbs]
+end
+Base.length(fd::BigFloatData) = length(getfield(fd, :d)) - offset_p_limbs
+Base.copyto!(fd::BigFloatData, limbs) = copyto!(getfield(fd, :d), offset_p_limbs + 1, limbs) # for Random
+
+include("rawbigfloats.jl")
 
 rounding_raw(::Type{BigFloat}) = something(Base.ScopedValues.get(CURRENT_ROUNDING_MODE), ROUNDING_MODE[])
 setrounding_raw(::Type{BigFloat}, r::MPFRRoundingMode) = ROUNDING_MODE[]=r
 function setrounding_raw(f::Function, ::Type{BigFloat}, r::MPFRRoundingMode)
-    Base.@with(CURRENT_ROUNDING_MODE => r, f())
+    Base.ScopedValues.@with(CURRENT_ROUNDING_MODE => r, f())
 end
 
-
 rounding(::Type{BigFloat}) = convert(RoundingMode, rounding_raw(BigFloat))
 setrounding(::Type{BigFloat}, r::RoundingMode) = setrounding_raw(BigFloat, convert(MPFRRoundingMode, r))
 setrounding(f::Function, ::Type{BigFloat}, r::RoundingMode) =
     setrounding_raw(f, BigFloat, convert(MPFRRoundingMode, r))
 
 
-# overload the definition of unsafe_convert to ensure that `x.d` is assigned
-# it may have been dropped in the event that the BigFloat was serialized
-Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ptr{BigFloat}) = x
-@inline function Base.unsafe_convert(::Type{Ref{BigFloat}}, x::Ref{BigFloat})
-    x = x[]
-    if x.d == C_NULL
-        x.d = pointer(x._d)
-    end
-    return convert(Ptr{BigFloat}, Base.pointer_from_objref(x))
-end
-
 """
     BigFloat(x::Union{Real, AbstractString} [, rounding::RoundingMode=rounding(BigFloat)]; [precision::Integer=precision(BigFloat)])
 
@@ -283,17 +340,18 @@ function BigFloat(x::Float64, r::MPFRRoundingMode=rounding_raw(BigFloat); precis
     nlimbs = (precision + 8*Core.sizeof(Limb) - 1) ÷ (8*Core.sizeof(Limb))
 
     # Limb is a CLong which is a UInt32 on windows (thank M$) which makes this more complicated and slower.
+    zd = z.d
     if Limb === UInt64
         for i in 1:nlimbs-1
-            unsafe_store!(z.d, 0x0, i)
+            @inbounds setindex!(zd, 0x0, i)
         end
-        unsafe_store!(z.d, val, nlimbs)
+        @inbounds setindex!(zd, val, nlimbs)
     else
         for i in 1:nlimbs-2
-            unsafe_store!(z.d, 0x0, i)
+            @inbounds setindex!(zd, 0x0, i)
         end
-        unsafe_store!(z.d, val % UInt32, nlimbs-1)
-        unsafe_store!(z.d, (val >> 32) % UInt32, nlimbs)
+        @inbounds setindex!(zd, val % UInt32, nlimbs-1)
+        @inbounds setindex!(zd, (val >> 32) % UInt32, nlimbs)
     end
     z
 end
@@ -440,12 +498,12 @@ function to_ieee754(::Type{T}, x::BigFloat, rm) where {T<:AbstractFloat}
     ret_u = if is_regular & !rounds_to_inf & !rounds_to_zero
         if !exp_is_huge_p
             # significand
-            v = RawBigInt{Limb}(x._d, significand_limb_count(x))
+            v = x.d::BigFloatData
             len = max(ieee_precision + min(exp_diff, 0), 0)::Int
             signif = truncated(U, v, len) & significand_mask(T)
 
             # round up if necessary
-            rh = RawBigIntRoundingIncrementHelper(v, len)
+            rh = BigFloatDataRoundingIncrementHelper(v, len)
             incr = correct_rounding_requires_increment(rh, rm, sb)
 
             # exponent
@@ -1109,7 +1167,7 @@ Note: `nextfloat()`, `prevfloat()` do not use the precision mentioned by
     The `base` keyword requires at least Julia 1.8.
 """
 function setprecision(f::Function, ::Type{BigFloat}, prec::Integer; base::Integer=2)
-    Base.@with(CURRENT_PRECISION => _convert_precision_from_base(prec, base), f())
+    Base.ScopedValues.@with(CURRENT_PRECISION => _convert_precision_from_base(prec, base), f())
 end
 
 setprecision(f::Function, prec::Integer; base::Integer=2) = setprecision(f, BigFloat, prec; base)
@@ -1193,10 +1251,8 @@ set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, libmpfr), Cint, (Clong,
 
 function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
     get!(stackdict, x) do
-        # d = copy(x._d)
-        d = x._d
-        d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
-        y = _BigFloat(x.prec, x.sign, x.exp, d′)
+        d′ = copy(getfield(x, :d))
+        y = _BigFloat(d′)
         #ccall((:mpfr_custom_move,libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
         return y
     end::BigFloat
@@ -1210,7 +1266,8 @@ function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
     s.size = cld(x.prec, 8*sizeof(Limb)) # limbs
     b = s.size * sizeof(Limb)            # bytes
     ccall((:__gmpz_realloc2, libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits
-    memcpy(s.d, x.d, b)
+    xd = x.d
+    GC.@preserve xd memcpy(s.d, Base.unsafe_convert(Ptr{Limb}, xd), b)
     s, x.exp - 8b, x.sign
 end
 
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index b4625c12620e3..99f41f2404e47 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -730,6 +730,8 @@ end
 end
 @inline checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndex) =
     checkbounds_indices(Bool, inds, I.I)
+@inline checkindex(::Type{Bool}, inds::Tuple, i::AbstractRange{<:CartesianIndex}) =
+    isempty(i) | (checkindex(Bool, inds, first(i)) & checkindex(Bool, inds, last(i)))
 
 # Indexing into Array with mixtures of Integers and CartesianIndices is
 # extremely performance-sensitive. While the abstract fallbacks support this,
@@ -1669,12 +1671,11 @@ function permutedims(B::StridedArray, perm)
     permutedims!(P, B, perm)
 end
 
-function checkdims_perm(P::AbstractArray{TP,N}, B::AbstractArray{TB,N}, perm) where {TP,TB,N}
-    indsB = axes(B)
-    length(perm) == N || throw(ArgumentError("expected permutation of size $N, but length(perm)=$(length(perm))"))
+checkdims_perm(P::AbstractArray{TP,N}, B::AbstractArray{TB,N}, perm) where {TP,TB,N} = checkdims_perm(axes(P), axes(B), perm)
+function checkdims_perm(indsP::NTuple{N, AbstractUnitRange}, indsB::NTuple{N, AbstractUnitRange}, perm) where {N}
+    length(perm) == N || throw(ArgumentError(LazyString("expected permutation of size ", N, ", but length(perm)=", length(perm))))
     isperm(perm) || throw(ArgumentError("input is not a permutation"))
-    indsP = axes(P)
-    for i = 1:length(perm)
+    for i in eachindex(perm)
         indsP[i] == indsB[perm[i]] || throw(DimensionMismatch("destination tensor of incorrect size"))
     end
     nothing
@@ -1683,7 +1684,7 @@ end
 for (V, PT, BT) in Any[((:N,), BitArray, BitArray), ((:T,:N), Array, StridedArray)]
     @eval @generated function permutedims!(P::$PT{$(V...)}, B::$BT{$(V...)}, perm) where $(V...)
         quote
-            checkdims_perm(P, B, perm)
+            checkdims_perm(axes(P), axes(B), perm)
 
             #calculates all the strides
             native_strides = size_to_strides(1, size(B)...)
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index 98192480db9dd..e316dbd37ccf5 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -424,6 +424,24 @@ function diff_fallback(a::NamedTuple, an::Tuple{Vararg{Symbol}}, bn::Tuple{Varar
     _new_NamedTuple(NamedTuple{names, types}, (A...,))
 end
 
+"""
+    delete(a::NamedTuple, field::Symbol)
+
+Construct a new named tuple from `a` by removing the named field.
+
+```jldoctest
+julia> Base.delete((a=1, b=2, c=3), :a)
+(b = 2, c = 3)
+
+julia> Base.delete((a=1, b=2, c=3), :b)
+(a = 1, c = 3)
+```
+"""
+@constprop :aggressive function delete(a::NamedTuple{an}, field::Symbol) where {an}
+    names = diff_names(an, (field,))
+    NamedTuple{names}(a)
+end
+
 """
     structdiff(a::NamedTuple, b::Union{NamedTuple,Type{NamedTuple}})
 
diff --git a/base/opaque_closure.jl b/base/opaque_closure.jl
index 779cbf55ceaf3..0f1fdf47afed8 100644
--- a/base/opaque_closure.jl
+++ b/base/opaque_closure.jl
@@ -18,7 +18,7 @@ the argument type may be fixed length even if the function is variadic.
     This interface is experimental and subject to change or removal without notice.
 """
 macro opaque(ex)
-    esc(Expr(:opaque_closure, nothing, nothing, nothing, ex))
+    esc(Expr(:opaque_closure, nothing, nothing, nothing, #= allow_partial =# true, ex))
 end
 
 macro opaque(ty, ex)
@@ -34,7 +34,7 @@ macro opaque(ty, ex)
     end
     AT = (AT !== :_) ? AT : nothing
     RT = (RT !== :_) ? RT : nothing
-    return esc(Expr(:opaque_closure, AT, RT, RT, ex))
+    return esc(Expr(:opaque_closure, AT, RT, RT, #= allow_partial =# true, ex))
 end
 
 # OpaqueClosure construction from pre-inferred CodeInfo/IRCode
diff --git a/base/operators.jl b/base/operators.jl
index 4a9daf21c4be5..d01902e302359 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -58,12 +58,14 @@ but which do not execute the operator or return a Bool:
 """
 (<:)
 
+import Core: >:
+
 """
     >:(T1, T2)
 
 Supertype operator, equivalent to `T2 <: T1`.
 """
-(>:)(@nospecialize(a), @nospecialize(b)) = (b <: a)
+>:
 
 """
     supertype(T::Union{DataType, UnionAll})
@@ -111,9 +113,10 @@ Use [`isequal`](@ref) or [`===`](@ref) to always get a `Bool` result.
 New numeric types should implement this function for two arguments of the new type, and
 handle comparison to other types via promotion rules where possible.
 
-[`isequal`](@ref) falls back to `==`, so new methods of `==` will be used by the
-[`Dict`](@ref) type to compare keys. If your type will be used as a dictionary key, it
-should therefore also implement [`hash`](@ref).
+Equality and hashing are intimately related; two values that are considered [`isequal`](@ref) **must**
+have the same [`hash`](@ref) and by default `isequal` falls back to `==`. If a type customizes the behavior of `==` and/or [`isequal`](@ref),
+then [`hash`](@ref) must be similarly implemented to ensure `isequal` and `hash` agree. `Set`s, `Dict`s, and many other internal
+implementations assume that this invariant holds.
 
 If some type defines `==`, [`isequal`](@ref), and [`isless`](@ref) then it should
 also implement [`<`](@ref) to ensure consistency of comparisons.
@@ -345,6 +348,7 @@ true
 ===
 const ≡ = ===
 
+import Core: !==
 """
     !==(x, y)
     ≢(x,y)
@@ -362,7 +366,8 @@ julia> a ≢ a
 false
 ```
 """
-!==(@nospecialize(x), @nospecialize(y)) = !(x === y)
+!==
+
 const ≢ = !==
 
 """
@@ -1149,40 +1154,55 @@ julia> filter(!isletter, str)
 !(f::ComposedFunction{typeof(!)}) = f.inner #allows !!f === f
 
 """
-    Fix1(f, x)
+    Fix{N}(f, x)
 
-A type representing a partially-applied version of the two-argument function
-`f`, with the first argument fixed to the value "x". In other words,
-`Fix1(f, x)` behaves similarly to `y->f(x, y)`.
+A type representing a partially-applied version of a function `f`, with the argument
+`x` fixed at position `N::Int`. In other words, `Fix{3}(f, x)` behaves similarly to
+`(y1, y2, y3...; kws...) -> f(y1, y2, x, y3...; kws...)`.
 
-See also [`Fix2`](@ref Base.Fix2).
+!!! compat "Julia 1.12"
+    This general functionality requires at least Julia 1.12, while `Fix1` and `Fix2`
+    are available earlier.
+
+!!! note
+    When nesting multiple `Fix`, note that the `N` in `Fix{N}` is _relative_ to the current
+    available arguments, rather than an absolute ordering on the target function. For example,
+    `Fix{1}(Fix{2}(f, 4), 4)` fixes the first and second arg, while `Fix{2}(Fix{1}(f, 4), 4)`
+    fixes the first and third arg.
 """
-struct Fix1{F,T} <: Function
+struct Fix{N,F,T} <: Function
     f::F
     x::T
 
-    Fix1(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
-    Fix1(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
+    function Fix{N}(f::F, x) where {N,F}
+        if !(N isa Int)
+            throw(ArgumentError(LazyString("expected type parameter in `Fix` to be `Int`, but got `", N, "::", typeof(N), "`")))
+        elseif N < 1
+            throw(ArgumentError(LazyString("expected `N` in `Fix{N}` to be integer greater than 0, but got ", N)))
+        end
+        new{N,_stable_typeof(f),_stable_typeof(x)}(f, x)
+    end
+end
+
+function (f::Fix{N})(args::Vararg{Any,M}; kws...) where {N,M}
+    M < N-1 && throw(ArgumentError(LazyString("expected at least ", N-1, " arguments to `Fix{", N, "}`, but got ", M)))
+    return f.f(args[begin:begin+(N-2)]..., f.x, args[begin+(N-1):end]...; kws...)
 end
 
-(f::Fix1)(y) = f.f(f.x, y)
+# Special cases for improved constant propagation
+(f::Fix{1})(arg; kws...) = f.f(f.x, arg; kws...)
+(f::Fix{2})(arg; kws...) = f.f(arg, f.x; kws...)
 
 """
-    Fix2(f, x)
-
-A type representing a partially-applied version of the two-argument function
-`f`, with the second argument fixed to the value "x". In other words,
-`Fix2(f, x)` behaves similarly to `y->f(y, x)`.
+Alias for `Fix{1}`. See [`Fix`](@ref Base.Fix).
 """
-struct Fix2{F,T} <: Function
-    f::F
-    x::T
+const Fix1{F,T} = Fix{1,F,T}
 
-    Fix2(f::F, x) where {F} = new{F,_stable_typeof(x)}(f, x)
-    Fix2(f::Type{F}, x) where {F} = new{Type{F},_stable_typeof(x)}(f, x)
-end
+"""
+Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).
+"""
+const Fix2{F,T} = Fix{2,F,T}
 
-(f::Fix2)(y) = f.f(y, f.x)
 
 """
     isequal(x)
@@ -1319,8 +1339,7 @@ struct Splat{F} <: Function
     Splat(f) = new{Core.Typeof(f)}(f)
 end
 (s::Splat)(args) = s.f(args...)
-print(io::IO, s::Splat) = print(io, "splat(", s.f, ')')
-show(io::IO, s::Splat) = print(io, s)
+show(io::IO, s::Splat) = (print(io, "splat("); show(io, s.f); print(io, ")"))
 
 ## in and related operators
 
diff --git a/base/options.jl b/base/options.jl
index 41ce3c9e20909..f535c27d99122 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -34,6 +34,7 @@ struct JLOptions
     can_inline::Int8
     polly::Int8
     trace_compile::Ptr{UInt8}
+    trace_dispatch::Ptr{UInt8}
     fast_math::Int8
     worker::Int8
     cookie::Ptr{UInt8}
@@ -58,6 +59,7 @@ struct JLOptions
     permalloc_pkgimg::Int8
     heap_size_hint::UInt64
     trace_compile_timing::Int8
+    trim::Int8
 end
 
 # This runs early in the sysimage != is not defined yet
diff --git a/base/partr.jl b/base/partr.jl
index 8c95e3668ee74..6053a584af5ba 100644
--- a/base/partr.jl
+++ b/base/partr.jl
@@ -20,7 +20,60 @@ const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
 const heaps_lock = [SpinLock(), SpinLock()]
 
 
-cong(max::UInt32) = iszero(max) ? UInt32(0) : ccall(:jl_rand_ptls, UInt32, (UInt32,), max) + UInt32(1)
+"""
+    cong(max::UInt32)
+
+Return a random UInt32 in the range `1:max` except if max is 0, in that case return 0.
+"""
+cong(max::UInt32) = iszero(max) ? UInt32(0) : rand_ptls(max) + UInt32(1) #TODO: make sure users don't use 0 and remove this check
+
+get_ptls_rng() = ccall(:jl_get_ptls_rng, UInt64, ())
+
+set_ptls_rng(seed::UInt64) = ccall(:jl_set_ptls_rng, Cvoid, (UInt64,), seed)
+
+"""
+    rand_ptls(max::UInt32)
+
+Return a random UInt32 in the range `0:max-1` using the thread-local RNG
+state. Max must be greater than 0.
+"""
+Base.@assume_effects :removable :inaccessiblememonly :notaskstate function rand_ptls(max::UInt32)
+    rngseed = get_ptls_rng()
+    val, seed = rand_uniform_max_int32(max, rngseed)
+    set_ptls_rng(seed)
+    return val % UInt32
+end
+
+# This implementation is based on OpenSSLs implementation of rand_uniform
+# https://github.com/openssl/openssl/blob/1d2cbd9b5a126189d5e9bc78a3bdb9709427d02b/crypto/rand/rand_uniform.c#L13-L99
+# Comments are vendored from their implementation as well.
+# For the original developer check the PR to swift https://github.com/apple/swift/pull/39143.
+
+# Essentially it boils down to incrementally generating a fixed point
+# number on the interval [0, 1) and multiplying this number by the upper
+# range limit.  Once it is certain what the fractional part contributes to
+# the integral part of the product, the algorithm has produced a definitive
+# result.
+"""
+    rand_uniform_max_int32(max::UInt32, seed::UInt64)
+
+Return a random UInt32 in the range `0:max-1` using the given seed.
+Max must be greater than 0.
+"""
+Base.@assume_effects :total function rand_uniform_max_int32(max::UInt32, seed::UInt64)
+    if max == UInt32(1)
+        return UInt32(0), seed
+    end
+    # We are generating a fixed point number on the interval [0, 1).
+    # Multiplying this by the range gives us a number on [0, upper).
+    # The high word of the multiplication result represents the integral part
+    # This is not completely unbiased as it's missing the fractional part of the original implementation but it's good enough for our purposes
+    seed = UInt64(69069) * seed + UInt64(362437)
+    prod = (UInt64(max)) * (seed % UInt32) # 64 bit product
+    i = prod >> 32 % UInt32 # integral part
+    return i % UInt32, seed
+end
+
 
 
 function multiq_sift_up(heap::taskheap, idx::Int32)
diff --git a/base/path.jl b/base/path.jl
index 3b8124f34f174..69c8d22c63c54 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -613,3 +613,56 @@ relpath(path::AbstractString, startpath::AbstractString) =
 for f in (:isdirpath, :splitdir, :splitdrive, :splitext, :normpath, :abspath)
     @eval $f(path::AbstractString) = $f(String(path))
 end
+
+# RFC3986 Section 2.1
+percent_escape(s) = '%' * join(map(b -> uppercase(string(b, base=16)), codeunits(s)), '%')
+# RFC3986 Section 2.3
+encode_uri_component(s) = replace(s, r"[^A-Za-z0-9\-_.~/]+" => percent_escape)
+
+"""
+    uripath(path::AbstractString)
+
+Encode `path` as a URI as per [RFC8089: The "file" URI
+Scheme](https://www.rfc-editor.org/rfc/rfc8089), [RFC3986: Uniform Resource
+Identifier (URI): Generic Syntax](https://www.rfc-editor.org/rfc/rfc3986), and
+the [Freedesktop File URI spec](https://www.freedesktop.org/wiki/Specifications/file-uri-spec/).
+
+## Examples
+
+```julia-repl
+julia> uripath("/home/user/example file.jl") # On a unix machine
+"file://<hostname>/home/user/example%20file.jl"
+
+juila> uripath("C:\\Users\\user\\example file.jl") # On a windows machine
+"file:///C:/Users/user/example%20file.jl"
+```
+"""
+function uripath end
+
+@static if Sys.iswindows()
+    function uripath(path::String)
+        path = abspath(path)
+        if startswith(path, "\\\\") # UNC path, RFC8089 Appendix E.3
+            unixpath = join(eachsplit(path, path_separator_re, keepempty=false), '/')
+            string("file://", encode_uri_component(unixpath)) # RFC8089 Section 2
+        else
+            drive, localpath = splitdrive(path) # Assuming that non-UNC absolute paths on Windows always have a drive component
+            unixpath = join(eachsplit(localpath, path_separator_re, keepempty=false), '/')
+            encdrive = replace(encode_uri_component(drive), "%3A" => ':', "%7C" => '|') # RFC8089 Appendices D.2, E.2.1, and E.2.2
+            string("file:///", encdrive, '/', encode_uri_component(unixpath)) # RFC8089 Section 2
+        end
+    end
+else
+    function uripath(path::String)
+        localpath = join(eachsplit(abspath(path), path_separator_re, keepempty=false), '/')
+        host = if ispath("/proc/sys/fs/binfmt_misc/WSLInterop") # WSL sigil
+            distro = get(ENV, "WSL_DISTRO_NAME", "") # See <https://patrickwu.space/wslconf/>
+            "wsl\$/$distro" # See <https://github.com/microsoft/terminal/pull/14993> and <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
+        else
+            gethostname() # Freedesktop File URI Spec, Hostnames section
+        end
+        string("file://", encode_uri_component(host), '/', encode_uri_component(localpath)) # RFC8089 Section 2
+    end
+end
+
+uripath(path::AbstractString) = uripath(String(path))
diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl
index 4e77d6b13ce21..cf9748168aac2 100644
--- a/base/permuteddimsarray.jl
+++ b/base/permuteddimsarray.jl
@@ -282,7 +282,7 @@ regions.
 See also [`permutedims`](@ref).
 """
 function permutedims!(dest, src::AbstractArray, perm)
-    Base.checkdims_perm(dest, src, perm)
+    Base.checkdims_perm(axes(dest), axes(src), perm)
     P = PermutedDimsArray(dest, invperm(perm))
     _copy!(P, src)
     return dest
diff --git a/base/precompilation.jl b/base/precompilation.jl
index dfaf671a63534..b351ce67cfbad 100644
--- a/base/precompilation.jl
+++ b/base/precompilation.jl
@@ -285,7 +285,7 @@ function show_progress(io::IO, p::MiniProgressBar; termwidth=nothing, carriagere
         return
     end
     t = time()
-    if p.has_shown && (t - p.time_shown) < PROGRESS_BAR_TIME_GRANULARITY[]
+    if !p.always_reprint && p.has_shown && (t - p.time_shown) < PROGRESS_BAR_TIME_GRANULARITY[]
         return
     end
     p.time_shown = t
@@ -301,12 +301,15 @@ function show_progress(io::IO, p::MiniProgressBar; termwidth=nothing, carriagere
     max_progress_width = max(0, min(termwidth - textwidth(p.header) - textwidth(progress_text) - 10 , p.width))
     n_filled = ceil(Int, max_progress_width * perc / 100)
     n_left = max_progress_width - n_filled
+    headers = split(p.header, ' ')
     to_print = sprint(; context=io) do io
         print(io, " "^p.indent)
-        printstyled(io, p.header, color=p.color, bold=true)
-        print(io, " [")
-        print(io, "="^n_filled, ">")
-        print(io, " "^n_left, "]  ", )
+        printstyled(io, headers[1], " "; color=:green, bold=true)
+        printstyled(io, join(headers[2:end], ' '))
+        print(io, " ")
+        printstyled(io, "━"^n_filled; color=p.color)
+        printstyled(io, perc >= 95 ? "━" : "╸"; color=p.color)
+        printstyled(io, "━"^n_left, " "; color=:light_black)
         print(io, progress_text)
         carriagereturn && print(io, "\r")
     end
@@ -342,7 +345,7 @@ import Base: StaleCacheKey
 
 can_fancyprint(io::IO) = io isa Base.TTY && (get(ENV, "CI", nothing) != "true")
 
-function printpkgstyle(io, header, msg; color=:light_green)
+function printpkgstyle(io, header, msg; color=:green)
     printstyled(io, header; color, bold=true)
     println(io, " ", msg)
 end
@@ -432,51 +435,6 @@ function precompilepkgs(pkgs::Vector{String}=String[];
     # consider exts of direct deps to be direct deps so that errors are reported
     append!(direct_deps, keys(filter(d->last(d) in keys(env.project_deps), exts)))
 
-    # An extension effectively depends on another extension if it has all the the
-    # dependencies of that other extension
-    function expand_dependencies(depsmap)
-        function visit!(visited, node, all_deps)
-            if node in visited
-                return
-            end
-            push!(visited, node)
-            for dep in get(Set{Base.PkgId}, depsmap, node)
-                if !(dep in all_deps)
-                    push!(all_deps, dep)
-                    visit!(visited, dep, all_deps)
-                end
-            end
-        end
-
-        depsmap_transitive = Dict{Base.PkgId, Set{Base.PkgId}}()
-        for package in keys(depsmap)
-            # Initialize a set to keep track of all dependencies for 'package'
-            all_deps = Set{Base.PkgId}()
-            visited = Set{Base.PkgId}()
-            visit!(visited, package, all_deps)
-            # Update depsmap with the complete set of dependencies for 'package'
-            depsmap_transitive[package] = all_deps
-        end
-        return depsmap_transitive
-    end
-
-    depsmap_transitive = expand_dependencies(depsmap)
-
-    for (_, extensions_1) in pkg_exts_map
-        for extension_1 in extensions_1
-            deps_ext_1 = depsmap_transitive[extension_1]
-            for (_, extensions_2) in pkg_exts_map
-                for extension_2 in extensions_2
-                    extension_1 == extension_2 && continue
-                    deps_ext_2 = depsmap_transitive[extension_2]
-                    if issubset(deps_ext_2, deps_ext_1)
-                        push!(depsmap[extension_1], extension_2)
-                    end
-                end
-            end
-        end
-    end
-
     @debug "precompile: deps collected"
     # this loop must be run after the full depsmap has been populated
     for (pkg, pkg_exts) in pkg_exts_map
@@ -563,9 +521,6 @@ function precompilepkgs(pkgs::Vector{String}=String[];
     if !manifest
         if isempty(pkgs)
             pkgs = [pkg.name for pkg in direct_deps]
-            target = "all packages"
-        else
-            target = join(pkgs, ", ")
         end
         # restrict to dependencies of given packages
         function collect_all_deps(depsmap, dep, alldeps=Set{Base.PkgId}())
@@ -601,18 +556,16 @@ function precompilepkgs(pkgs::Vector{String}=String[];
                 return
             end
         end
-    else
-        target = "manifest"
     end
 
     nconfigs = length(configs)
+    target = nothing
     if nconfigs == 1
         if !isempty(only(configs)[1])
-            target *= " for configuration $(join(only(configs)[1], " "))"
+            target = "for configuration $(join(only(configs)[1], " "))"
         end
-        target *= "..."
     else
-        target *= " for $nconfigs compilation configurations..."
+        target = "for $nconfigs compilation configurations..."
     end
     @debug "precompile: packages filtered"
 
@@ -694,15 +647,19 @@ function precompilepkgs(pkgs::Vector{String}=String[];
         try
             wait(first_started)
             (isempty(pkg_queue) || interrupted_or_done.set) && return
-            fancyprint && lock(print_lock) do
-                printpkgstyle(io, :Precompiling, target)
-                print(io, ansi_disablecursor)
+            lock(print_lock) do
+                if target !== nothing
+                    printpkgstyle(io, :Precompiling, target)
+                end
+                if fancyprint
+                    print(io, ansi_disablecursor)
+                end
             end
             t = Timer(0; interval=1/10)
             anim_chars = ["◐","◓","◑","◒"]
             i = 1
             last_length = 0
-            bar = MiniProgressBar(; indent=2, header = "Progress", color = Base.info_color(), percentage=false, always_reprint=true)
+            bar = MiniProgressBar(; indent=0, header = "Precompiling packages ", color = :green, percentage=false, always_reprint=true)
             n_total = length(depsmap) * length(configs)
             bar.max = n_total - n_already_precomp
             final_loop = false
@@ -710,7 +667,7 @@ function precompilepkgs(pkgs::Vector{String}=String[];
             while !printloop_should_exit
                 lock(print_lock) do
                     term_size = Base.displaysize(io)::Tuple{Int,Int}
-                    num_deps_show = term_size[1] - 3
+                    num_deps_show = max(term_size[1] - 3, 2) # show at least 2 deps
                     pkg_queue_show = if !interrupted_or_done.set && length(pkg_queue) > num_deps_show
                         last(pkg_queue, num_deps_show)
                     else
@@ -831,8 +788,10 @@ function precompilepkgs(pkgs::Vector{String}=String[];
                             config_str = "$(join(flags, " "))"
                             name *= color_string(" $(config_str)", :light_black)
                         end
-                        !fancyprint && lock(print_lock) do
-                            isempty(pkg_queue) && printpkgstyle(io, :Precompiling, target)
+                        lock(print_lock) do
+                            if !fancyprint && target === nothing && isempty(pkg_queue)
+                                printpkgstyle(io, :Precompiling, "packages...")
+                            end
                         end
                         push!(pkg_queue, pkg_config)
                         started[pkg_config] = true
@@ -848,7 +807,7 @@ function precompilepkgs(pkgs::Vector{String}=String[];
                             t = @elapsed ret = precompile_pkgs_maybe_cachefile_lock(io, print_lock, fancyprint, pkg_config, pkgspidlocked, hascolor) do
                                 Base.with_logger(Base.NullLogger()) do
                                     # The false here means we ignore loaded modules, so precompile for a fresh session
-                                    Base.compilecache(pkg, sourcepath, std_pipe, std_pipe, false; flags, cacheflags)
+                                    Base.compilecache(pkg, sourcepath, std_pipe, std_pipe, false; flags, cacheflags, isext = haskey(exts, pkg))
                                 end
                             end
                             if ret isa Base.PrecompilableError
@@ -897,6 +856,7 @@ function precompilepkgs(pkgs::Vector{String}=String[];
                     length(tasks) == 1 && notify(interrupted_or_done)
                 end
             end
+            Base.errormonitor(task) # interrupts are handled separately so ok to watch for other errors like this
             push!(tasks, task)
         end
     end
@@ -914,8 +874,12 @@ function precompilepkgs(pkgs::Vector{String}=String[];
     seconds_elapsed = round(Int, (time_ns() - time_start) / 1e9)
     ndeps = count(values(was_recompiled))
     if ndeps > 0 || !isempty(failed_deps) || (quick_exit && !isempty(std_outputs))
-        str = sprint() do iostr
+        str = sprint(context=io) do iostr
             if !quick_exit
+                if fancyprint # replace the progress bar
+                    what = isempty(requested_pkgs) ? "packages finished." : "$(join(requested_pkgs, ", ", " and ")) finished."
+                    printpkgstyle(iostr, :Precompiling, what)
+                end
                 plural = length(configs) > 1 ? "dependency configurations" : ndeps == 1 ? "dependency" : "dependencies"
                 print(iostr, "  $(ndeps) $(plural) successfully precompiled in $(seconds_elapsed) seconds")
                 if n_already_precomp > 0 || !isempty(circular_deps)
diff --git a/base/public.jl b/base/public.jl
index 460d3bc536383..2e8e777d2f91d 100644
--- a/base/public.jl
+++ b/base/public.jl
@@ -14,21 +14,15 @@ public
     AsyncCondition,
     CodeUnits,
     Event,
+    Fix,
     Fix1,
     Fix2,
     Generator,
     ImmutableDict,
     OneTo,
     LogRange,
-    AnnotatedString,
-    AnnotatedChar,
     UUID,
 
-# Annotated strings
-    annotatedstring,
-    annotate!,
-    annotations,
-
 # Semaphores
     Semaphore,
     acquire,
@@ -66,7 +60,7 @@ public
     ispublic,
     remove_linenums!,
 
-# Opperators
+# Operators
     operator_associativity,
     operator_precedence,
     isbinaryoperator,
@@ -109,7 +103,11 @@ public
     reseteof,
     link_pipe!,
 
+# filesystem operations
+    rename,
+
 # misc
     notnothing,
     runtests,
-    text_colors
+    text_colors,
+    depwarn
diff --git a/base/range.jl b/base/range.jl
index 8b30222382c9a..4b5d076dcf436 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -1485,7 +1485,7 @@ end
 """
     mod(x::Integer, r::AbstractUnitRange)
 
-Find `y` in the range `r` such that ``x ≡ y (mod n)``, where `n = length(r)`,
+Find `y` in the range `r` such that `x` ≡ `y` (mod `n`), where `n = length(r)`,
 i.e. `y = mod(x - first(r), n) + first(r)`.
 
 See also [`mod1`](@ref).
diff --git a/base/rational.jl b/base/rational.jl
index fb1824acb6b31..b4e450fd73abc 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -293,8 +293,14 @@ julia> numerator(4)
 4
 ```
 """
-numerator(x::Integer) = x
+numerator(x::Union{Integer,Complex{<:Integer}}) = x
 numerator(x::Rational) = x.num
+function numerator(z::Complex{<:Rational})
+    den = denominator(z)
+    reim = (real(z), imag(z))
+    result = checked_mul.(numerator.(reim), div.(den, denominator.(reim)))
+    complex(result...)
+end
 
 """
     denominator(x)
@@ -310,8 +316,9 @@ julia> denominator(4)
 1
 ```
 """
-denominator(x::Integer) = one(x)
+denominator(x::Union{Integer,Complex{<:Integer}}) = one(x)
 denominator(x::Rational) = x.den
+denominator(z::Complex{<:Rational}) = lcm(denominator(real(z)), denominator(imag(z)))
 
 sign(x::Rational) = oftype(x, sign(x.num))
 signbit(x::Rational) = signbit(x.num)
diff --git a/base/rawbigints.jl b/base/rawbigfloats.jl
similarity index 54%
rename from base/rawbigints.jl
rename to base/rawbigfloats.jl
index 6508bea05be0f..4377edfc463d8 100644
--- a/base/rawbigints.jl
+++ b/base/rawbigfloats.jl
@@ -1,34 +1,21 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-"""
-Segment of raw words of bits interpreted as a big integer. Less
-significant words come first. Each word is in machine-native bit-order.
-"""
-struct RawBigInt{T<:Unsigned}
-    d::String
-    word_count::Int
-
-    function RawBigInt{T}(d::String, word_count::Int) where {T<:Unsigned}
-        new{T}(d, word_count)
-    end
-end
+# Some operations on BigFloat can be done more directly by treating the data portion ("BigFloatData") as a BigInt
 
-elem_count(x::RawBigInt, ::Val{:words}) = x.word_count
+elem_count(x::BigFloatData, ::Val{:words}) = length(x)
 elem_count(x::Unsigned, ::Val{:bits}) = sizeof(x) * 8
-word_length(::RawBigInt{T}) where {T} = elem_count(zero(T), Val(:bits))
-elem_count(x::RawBigInt{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words))
+word_length(::BigFloatData{T}) where {T} = elem_count(zero(T), Val(:bits))
+elem_count(x::BigFloatData{T}, ::Val{:bits}) where {T} = word_length(x) * elem_count(x, Val(:words))
 reversed_index(n::Int, i::Int) = n - i - 1
 reversed_index(x, i::Int, v::Val) = reversed_index(elem_count(x, v), i)::Int
-split_bit_index(x::RawBigInt, i::Int) = divrem(i, word_length(x), RoundToZero)
+split_bit_index(x::BigFloatData, i::Int) = divrem(i, word_length(x), RoundToZero)
 
 """
 `i` is the zero-based index of the wanted word in `x`, starting from
 the less significant words.
 """
-function get_elem(x::RawBigInt{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T}
-    # `i` must be non-negative and less than `x.word_count`
-    d = x.d
-    (GC.@preserve d unsafe_load(Ptr{T}(pointer(d)), i + 1))::T
+function get_elem(x::BigFloatData{T}, i::Int, ::Val{:words}, ::Val{:ascending}) where {T}
+    @inbounds return x[i + 1]::T
 end
 
 function get_elem(x, i::Int, v::Val, ::Val{:descending})
@@ -36,9 +23,9 @@ function get_elem(x, i::Int, v::Val, ::Val{:descending})
     get_elem(x, j, v, Val(:ascending))
 end
 
-word_is_nonzero(x::RawBigInt, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v))
+word_is_nonzero(x::BigFloatData, i::Int, v::Val) = !iszero(get_elem(x, i, Val(:words), v))
 
-word_is_nonzero(x::RawBigInt, v::Val) = let x = x
+word_is_nonzero(x::BigFloatData, v::Val) = let x = x
     i -> word_is_nonzero(x, i, v)
 end
 
@@ -46,7 +33,7 @@ end
 Returns a `Bool` indicating whether the `len` least significant words
 of `x` are nonzero.
 """
-function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:words})
+function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:words})
     any(word_is_nonzero(x, Val(:ascending)), 0:(len - 1))
 end
 
@@ -54,7 +41,7 @@ end
 Returns a `Bool` indicating whether the `len` least significant bits of
 the `i`-th (zero-based index) word of `x` are nonzero.
 """
-function tail_is_nonzero(x::RawBigInt, len::Int, i::Int, ::Val{:word})
+function tail_is_nonzero(x::BigFloatData, len::Int, i::Int, ::Val{:word})
     !iszero(len) &&
     !iszero(get_elem(x, i, Val(:words), Val(:ascending)) << (word_length(x) - len))
 end
@@ -63,7 +50,7 @@ end
 Returns a `Bool` indicating whether the `len` least significant bits of
 `x` are nonzero.
 """
-function tail_is_nonzero(x::RawBigInt, len::Int, ::Val{:bits})
+function tail_is_nonzero(x::BigFloatData, len::Int, ::Val{:bits})
     if 0 < len
         word_count, bit_count_in_word = split_bit_index(x, len)
         tail_is_nonzero(x, bit_count_in_word, word_count, Val(:word)) ||
@@ -83,7 +70,7 @@ end
 """
 Returns a `Bool` that is the `i`-th (zero-based index) bit of `x`.
 """
-function get_elem(x::RawBigInt, i::Int, ::Val{:bits}, v::Val{:ascending})
+function get_elem(x::BigFloatData, i::Int, ::Val{:bits}, v::Val{:ascending})
     vb = Val(:bits)
     if 0 ≤ i < elem_count(x, vb)
         word_index, bit_index_in_word = split_bit_index(x, i)
@@ -96,38 +83,44 @@ end
 
 """
 Returns an integer of type `R`, consisting of the `len` most
-significant bits of `x`.
+significant bits of `x`. If there are less than `len` bits in `x`,
+the least significant bits are zeroed.
 """
-function truncated(::Type{R}, x::RawBigInt, len::Int) where {R<:Integer}
+function truncated(::Type{R}, x::BigFloatData, len::Int) where {R<:Integer}
     ret = zero(R)
     if 0 < len
         word_count, bit_count_in_word = split_bit_index(x, len)
         k = word_length(x)
         vals = (Val(:words), Val(:descending))
+        lenx = elem_count(x, first(vals))
 
         for w ∈ 0:(word_count - 1)
             ret <<= k
-            word = get_elem(x, w, vals...)
-            ret |= R(word)
+            if w < lenx # if the output type is larger, truncate turns into zero-extend
+                word = get_elem(x, w, vals...)
+                ret |= R(word)
+            end
         end
 
         if !iszero(bit_count_in_word)
             ret <<= bit_count_in_word
-            wrd = get_elem(x, word_count, vals...)
-            ret |= R(wrd >>> (k - bit_count_in_word))
+            if word_count < lenx # if the output type is larger, truncate turns into zero-extend
+                wrd = get_elem(x, word_count, vals...)
+                ret |= R(wrd >>> (k - bit_count_in_word))
+            end
         end
     end
     ret::R
 end
 
-struct RawBigIntRoundingIncrementHelper{T<:Unsigned}
-    n::RawBigInt{T}
+struct BigFloatDataRoundingIncrementHelper{T<:Unsigned}
+    n::BigFloatData{T}
     trunc_len::Int
 
     final_bit::Bool
     round_bit::Bool
 
-    function RawBigIntRoundingIncrementHelper{T}(n::RawBigInt{T}, len::Int) where {T<:Unsigned}
+    function BigFloatDataRoundingIncrementHelper{T}(n::BigFloatData{T}, len::Int) where {T<:Unsigned}
         vals = (Val(:bits), Val(:descending))
         f = get_elem(n, len - 1, vals...)
         r = get_elem(n, len    , vals...)
@@ -135,15 +128,15 @@ struct RawBigIntRoundingIncrementHelper{T<:Unsigned}
     end
 end
 
-function RawBigIntRoundingIncrementHelper(n::RawBigInt{T}, len::Int) where {T<:Unsigned}
-    RawBigIntRoundingIncrementHelper{T}(n, len)
+function BigFloatDataRoundingIncrementHelper(n::BigFloatData{T}, len::Int) where {T<:Unsigned}
+    BigFloatDataRoundingIncrementHelper{T}(n, len)
 end
 
-(h::RawBigIntRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit
+(h::BigFloatDataRoundingIncrementHelper)(::Rounding.FinalBit) = h.final_bit
 
-(h::RawBigIntRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit
+(h::BigFloatDataRoundingIncrementHelper)(::Rounding.RoundBit) = h.round_bit
 
-function (h::RawBigIntRoundingIncrementHelper)(::Rounding.StickyBit)
+function (h::BigFloatDataRoundingIncrementHelper)(::Rounding.StickyBit)
     v = Val(:bits)
     n = h.n
     tail_is_nonzero(n, elem_count(n, v) - h.trunc_len - 1, v)
diff --git a/base/reduce.jl b/base/reduce.jl
index bbfd66e5686ed..0c37256b64fb5 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -638,11 +638,11 @@ function mapreduce_impl(f, op::Union{typeof(max), typeof(min)},
     start = first + 1
     simdstop  = start + chunk_len - 4
     while simdstop <= last - 3
-        @inbounds for i in start:4:simdstop
-            v1 = _fast(op, v1, f(A[i+0]))
-            v2 = _fast(op, v2, f(A[i+1]))
-            v3 = _fast(op, v3, f(A[i+2]))
-            v4 = _fast(op, v4, f(A[i+3]))
+        for i in start:4:simdstop
+            v1 = _fast(op, v1, f(@inbounds(A[i+0])))
+            v2 = _fast(op, v2, f(@inbounds(A[i+1])))
+            v3 = _fast(op, v3, f(@inbounds(A[i+2])))
+            v4 = _fast(op, v4, f(@inbounds(A[i+3])))
         end
         checkbounds(A, simdstop+3)
         start += chunk_len
diff --git a/base/reducedim.jl b/base/reducedim.jl
index 1d74fb0d498a5..0478afe1a46b6 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -196,11 +196,8 @@ end
 
 ## generic (map)reduction
 
-has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = false
-has_fast_linear_indexing(a::Array) = true
-has_fast_linear_indexing(::Union{Number,Ref,AbstractChar}) = true  # 0d objects, for Broadcasted
-has_fast_linear_indexing(bc::Broadcast.Broadcasted) =
-    all(has_fast_linear_indexing, bc.args)
+has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = IndexStyle(a) === IndexLinear()
+has_fast_linear_indexing(a::AbstractVector) = true
 
 function check_reducedims(R, A)
     # Check whether R has compatible dimensions w.r.t. A for reduction
@@ -261,8 +258,9 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
         # use mapreduce_impl, which is probably better tuned to achieve higher performance
         nslices = div(length(A), lsiz)
         ibase = first(LinearIndices(A))-1
-        for i = 1:nslices
-            @inbounds R[i] = op(R[i], mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
+        for i in eachindex(R)
+            r = op(@inbounds(R[i]), mapreduce_impl(f, op, A, ibase+1, ibase+lsiz))
+            @inbounds R[i] = r
             ibase += lsiz
         end
         return R
@@ -272,19 +270,20 @@ function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArrayOrBroadcasted)
     if reducedim1(R, A)
         # keep the accumulator as a local variable when reducing along the first dimension
         i1 = first(axes1(R))
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            r = R[i1,IR]
+            @inbounds r = R[i1,IR]
             @simd for i in axes(A, 1)
-                r = op(r, f(A[i, IA]))
+                r = op(r, f(@inbounds(A[i, IA])))
             end
-            R[i1,IR] = r
+            @inbounds R[i1,IR] = r
         end
     else
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             @simd for i in axes(A, 1)
-                R[i,IR] = op(R[i,IR], f(A[i,IA]))
+                v = op(@inbounds(R[i,IR]), f(@inbounds(A[i,IA])))
+                @inbounds R[i,IR] = v
             end
         end
     end
@@ -1028,33 +1027,33 @@ function findminmax!(f, op, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
     zi = zero(eltype(ks))
     if reducedim1(Rval, A)
         i1 = first(axes1(Rval))
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
-            tmpRv = Rval[i1,IR]
-            tmpRi = Rind[i1,IR]
+            @inbounds tmpRv = Rval[i1,IR]
+            @inbounds tmpRi = Rind[i1,IR]
             for i in axes(A,1)
                 k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
+                tmpAv = f(@inbounds(A[i,IA]))
                 if tmpRi == zi || op(tmpRv, tmpAv)
                     tmpRv = tmpAv
                     tmpRi = k
                 end
                 y = iterate(ks, kss)
             end
-            Rval[i1,IR] = tmpRv
-            Rind[i1,IR] = tmpRi
+            @inbounds Rval[i1,IR] = tmpRv
+            @inbounds Rind[i1,IR] = tmpRi
         end
     else
-        @inbounds for IA in CartesianIndices(indsAt)
+        for IA in CartesianIndices(indsAt)
             IR = Broadcast.newindex(IA, keep, Idefault)
             for i in axes(A, 1)
                 k, kss = y::Tuple
-                tmpAv = f(A[i,IA])
-                tmpRv = Rval[i,IR]
-                tmpRi = Rind[i,IR]
+                tmpAv = f(@inbounds(A[i,IA]))
+                @inbounds tmpRv = Rval[i,IR]
+                @inbounds tmpRi = Rind[i,IR]
                 if tmpRi == zi || op(tmpRv, tmpAv)
-                    Rval[i,IR] = tmpAv
-                    Rind[i,IR] = k
+                    @inbounds Rval[i,IR] = tmpAv
+                    @inbounds Rind[i,IR] = k
                 end
                 y = iterate(ks, kss)
             end
diff --git a/base/reflection.jl b/base/reflection.jl
index 0af40a2e481a0..80eeb4c4efb12 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -76,6 +76,17 @@ function fullname(m::Module)
     return (fullname(mp)..., mn)
 end
 
+"""
+    moduleloc(m::Module) -> LineNumberNode
+
+Get the location of the `module` definition.
+"""
+function moduleloc(m::Module)
+    line = Ref{Int32}(0)
+    file = ccall(:jl_module_getloc, Ref{Symbol}, (Any, Ref{Int32}), m, line)
+    return LineNumberNode(Int(line[]), file)
+end
+
 """
     names(x::Module; all::Bool=false, imported::Bool=false, usings::Bool=false) -> Vector{Symbol}
 
@@ -207,11 +218,34 @@ function _fieldnames(@nospecialize t)
     return t.name.names
 end
 
+const BINDING_KIND_GLOBAL       = 0x0
+const BINDING_KIND_CONST        = 0x1
+const BINDING_KIND_CONST_IMPORT = 0x2
+const BINDING_KIND_IMPLICIT     = 0x3
+const BINDING_KIND_EXPLICIT     = 0x4
+const BINDING_KIND_IMPORTED     = 0x5
+const BINDING_KIND_FAILED       = 0x6
+const BINDING_KIND_DECLARED     = 0x7
+const BINDING_KIND_GUARD        = 0x8
+
+function lookup_binding_partition(world::UInt, b::Core.Binding)
+    ccall(:jl_get_binding_partition, Ref{Core.BindingPartition}, (Any, UInt), b, world)
+end
+
+function lookup_binding_partition(world::UInt, gr::Core.GlobalRef)
+    ccall(:jl_get_globalref_partition, Ref{Core.BindingPartition}, (Any, UInt), gr, world)
+end
+
+binding_kind(bpart::Core.BindingPartition) = ccall(:jl_bpart_get_kind, UInt8, (Any,), bpart)
+binding_kind(m::Module, s::Symbol) = binding_kind(lookup_binding_partition(tls_world_age(), GlobalRef(m, s)))
+
 """
     fieldname(x::DataType, i::Integer)
 
 Get the name of field `i` of a `DataType`.
 
+The return type is `Symbol`, except when `x <: Tuple`, in which case the index of the field is returned, of type `Int`.
+
 # Examples
 ```jldoctest
 julia> fieldname(Rational, 1)
@@ -219,6 +253,9 @@ julia> fieldname(Rational, 1)
 
 julia> fieldname(Rational, 2)
 :den
+
+julia> fieldname(Tuple{String,Int}, 2)
+2
 ```
 """
 function fieldname(t::DataType, i::Integer)
@@ -246,6 +283,9 @@ fieldname(t::Type{<:Tuple}, i::Integer) =
 
 Get a tuple with the names of the fields of a `DataType`.
 
+Each name is a `Symbol`, except when `x <: Tuple`, in which case each name (actually the
+index of the field) is an `Int`.
+
 See also [`propertynames`](@ref), [`hasfield`](@ref).
 
 # Examples
@@ -255,6 +295,9 @@ julia> fieldnames(Rational)
 
 julia> fieldnames(typeof(1+im))
 (:re, :im)
+
+julia> fieldnames(Tuple{String,Int})
+(1, 2)
 ```
 """
 fieldnames(t::DataType) = (fieldcount(t); # error check to make sure type is specific enough
@@ -932,7 +975,7 @@ use it in the following manner to summarize information about a struct:
 julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)];
 
 julia> structinfo(Base.Filesystem.StatStruct)
-13-element Vector{Tuple{UInt64, Symbol, Type}}:
+14-element Vector{Tuple{UInt64, Symbol, Type}}:
  (0x0000000000000000, :desc, Union{RawFD, String})
  (0x0000000000000008, :device, UInt64)
  (0x0000000000000010, :inode, UInt64)
@@ -946,6 +989,7 @@ julia> structinfo(Base.Filesystem.StatStruct)
  (0x0000000000000050, :blocks, Int64)
  (0x0000000000000058, :mtime, Float64)
  (0x0000000000000060, :ctime, Float64)
+ (0x0000000000000068, :ioerrno, Int32)
 ```
 """
 fieldoffset(x::DataType, idx::Integer) = (@_foldable_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
@@ -985,7 +1029,7 @@ julia> struct Foo
        end
 
 julia> Base.fieldindex(Foo, :z)
-ERROR: FieldError: type Foo has no field z
+ERROR: FieldError: type Foo has no field `z`, available fields: `x`, `y`
 Stacktrace:
 [...]
 
@@ -1188,11 +1232,17 @@ hasgenerator(m::Core.MethodInstance) = hasgenerator(m.def::Method)
 
 # low-level method lookup functions used by the compiler
 
-unionlen(x::Union) = unionlen(x.a) + unionlen(x.b)
-unionlen(@nospecialize(x)) = 1
+unionlen(@nospecialize(x)) = x isa Union ? unionlen(x.a) + unionlen(x.b) : 1
 
-_uniontypes(x::Union, ts) = (_uniontypes(x.a,ts); _uniontypes(x.b,ts); ts)
-_uniontypes(@nospecialize(x), ts) = (push!(ts, x); ts)
+function _uniontypes(@nospecialize(x), ts::Array{Any,1})
+    if x isa Union
+        _uniontypes(x.a, ts)
+        _uniontypes(x.b, ts)
+    else
+        push!(ts, x)
+    end
+    return ts
+end
 uniontypes(@nospecialize(x)) = _uniontypes(x, Any[])
 
 function _methods(@nospecialize(f), @nospecialize(t), lim::Int, world::UInt)
@@ -1466,6 +1516,13 @@ struct CodegenParams
     """
     use_jlplt::Cint
 
+    """
+    If enabled, only provably reachable code (from functions marked with `entrypoint`) is included
+    in the output system image. Errors or warnings can be given for call sites too dynamic to handle.
+    The option is disabled by default. (0=>disabled, 1=>safe (static errors), 2=>unsafe, 3=>unsafe plus warnings)
+    """
+    trim::Cint
+
     """
     A pointer of type
 
@@ -1481,14 +1538,14 @@ struct CodegenParams
                    prefer_specsig::Bool=false,
                    gnu_pubnames::Bool=true, debug_info_kind::Cint = default_debug_info_kind(),
                    debug_info_level::Cint = Cint(JLOptions().debug_level), safepoint_on_entry::Bool=true,
-                   gcstack_arg::Bool=true, use_jlplt::Bool=true,
+                   gcstack_arg::Bool=true, use_jlplt::Bool=true, trim::Cint=Cint(0),
                    lookup::Ptr{Cvoid}=unsafe_load(cglobal(:jl_rettype_inferred_addr, Ptr{Cvoid})))
         return new(
             Cint(track_allocations), Cint(code_coverage),
             Cint(prefer_specsig),
             Cint(gnu_pubnames), debug_info_kind,
             debug_info_level, Cint(safepoint_on_entry),
-            Cint(gcstack_arg), Cint(use_jlplt),
+            Cint(gcstack_arg), Cint(use_jlplt), Cint(trim),
             lookup)
     end
 end
@@ -2402,7 +2459,7 @@ true
 ```
 """
 function hasmethod(@nospecialize(f), @nospecialize(t))
-    return Core._hasmethod(f, t isa Type ? t : to_tuple_type(t))
+    return Core._hasmethod(signature_type(f, t))
 end
 
 function Core.kwcall(kwargs::NamedTuple, ::typeof(hasmethod), @nospecialize(f), @nospecialize(t))
@@ -2425,7 +2482,7 @@ function hasmethod(f, t, kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_c
     for kw in kws
         endswith(String(kw), "...") && return true
     end
-    kwnames = Symbol[kwnames[i] for i in 1:length(kwnames)]
+    kwnames = collect(kwnames)
     return issubset(kwnames, kws)
 end
 
diff --git a/base/refvalue.jl b/base/refvalue.jl
index 000088ff0ce76..7a0f2f84e2206 100644
--- a/base/refvalue.jl
+++ b/base/refvalue.jl
@@ -46,9 +46,9 @@ function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefValue{T})
         # Instead, explicitly load the pointer from the `RefValue`,
         # which also ensures this returns same pointer as the one rooted in the `RefValue` object.
         p = atomic_pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), :monotonic)
-    end
-    if p == C_NULL
-        throw(UndefRefError())
+        if p == C_NULL
+            throw(UndefRefError())
+        end
     end
     return p
 end
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index d74a043293a3a..d31f3ebb5dd2d 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -373,6 +373,7 @@ has_offset_axes(a::ReinterpretArray) = has_offset_axes(a.parent)
 
 elsize(::Type{<:ReinterpretArray{T}}) where {T} = sizeof(T)
 cconvert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = cconvert(Ptr{S}, a.parent)
+unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent))
 
 @propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
     if isprimitivetype(T) && isprimitivetype(S)
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index 4173ef1d3f598..019f1d30a25c2 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -324,6 +324,7 @@ setindex!(A::ReshapedRange, val, index::ReshapedIndex) = _rs_setindex!_err()
 @noinline _rs_setindex!_err() = error("indexed assignment fails for a reshaped range; consider calling collect")
 
 cconvert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = cconvert(Ptr{T}, parent(a))
+unsafe_convert(::Type{Ptr{T}}, a::ReshapedArray{T}) where {T} = unsafe_convert(Ptr{T}, a.parent)
 
 # Add a few handy specializations to further speed up views of reshaped ranges
 const ReshapedUnitRange{T,N,A<:AbstractUnitRange} = ReshapedArray{T,N,A,Tuple{}}
diff --git a/base/scopedvalues.jl b/base/scopedvalues.jl
index 6ccd4687c5c65..39e3c2c076718 100644
--- a/base/scopedvalues.jl
+++ b/base/scopedvalues.jl
@@ -85,6 +85,8 @@ struct Scope
     values::ScopeStorage
 end
 
+Scope(scope::Scope) = scope
+
 function Scope(parent::Union{Nothing, Scope}, key::ScopedValue{T}, value) where T
     val = convert(T, value)
     if parent === nothing
diff --git a/base/show.jl b/base/show.jl
index 8db2826a2088c..ec6776d81f2d5 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -514,24 +514,16 @@ function _show_default(io::IO, @nospecialize(x))
 end
 
 function active_module()
-    isassigned(REPL_MODULE_REF) || return Main
-    REPL = REPL_MODULE_REF[]
-    return invokelatest(REPL.active_module)::Module
+    if ccall(:jl_is_in_pure_context, Bool, ())
+        error("active_module() should not be called from a pure context")
+    end
+    if !@isdefined(active_repl) || active_repl === nothing
+        return Main
+    end
+    return invokelatest(active_module, active_repl)::Module
 end
 
-# Check if a particular symbol is exported from a standard library module
-function is_exported_from_stdlib(name::Symbol, mod::Module)
-    !isdefined(mod, name) && return false
-    orig = getfield(mod, name)
-    while !(mod === Base || mod === Core)
-        activemod = active_module()
-        parent = parentmodule(mod)
-        if mod === activemod || mod === parent || parent === activemod
-            return false
-        end
-        mod = parent
-    end
-    return isexported(mod, name) && isdefined(mod, name) && !isdeprecated(mod, name) && getfield(mod, name) === orig
+module UsesCoreAndBaseOnly
 end
 
 function show_function(io::IO, f::Function, compact::Bool, fallback::Function)
@@ -544,13 +536,13 @@ function show_function(io::IO, f::Function, compact::Bool, fallback::Function)
         print(io, mt.name)
     elseif isdefined(mt, :module) && isdefined(mt.module, mt.name) &&
         getfield(mt.module, mt.name) === f
-        mod = active_module()
-        if is_exported_from_stdlib(mt.name, mt.module) || mt.module === mod
-            show_sym(io, mt.name)
-        else
+        # this used to call the removed internal function `is_exported_from_stdlib`, which effectively
+        # just checked for exports from Core and Base.
+        mod = get(io, :module, UsesCoreAndBaseOnly)
+        if !(isvisible(mt.name, mt.module, mod) || mt.module === mod)
             print(io, mt.module, ".")
-            show_sym(io, mt.name)
         end
+        show_sym(io, mt.name)
     else
         fallback(io, f)
     end
@@ -681,7 +673,7 @@ function show_can_elide(p::TypeVar, wheres::Vector, elide::Int, env::SimpleVecto
         has_typevar(v.lb, p) && return false
         has_typevar(v.ub, p) && return false
     end
-    for i = 1:length(env)
+    for i = eachindex(env)
         i == skip && continue
         has_typevar(env[i], p) && return false
     end
@@ -737,9 +729,9 @@ end
 function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector, wheres::Vector)
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless alias is visible from module passed to
-        # IOContext. If :module is not set, default to Main (or current active module).
+        # IOContext. If :module is not set, default to Main.
         # nothing can be used to force printing prefix.
-        from = get(io, :module, active_module())
+        from = get(io, :module, Main)
         if (from === nothing || !isvisible(name.name, name.mod, from))
             show(io, name.mod)
             print(io, ".")
@@ -1053,9 +1045,9 @@ function show_type_name(io::IO, tn::Core.TypeName)
     quo = false
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless type is visible from module passed to
-        # IOContext If :module is not set, default to Main (or current active module).
+        # IOContext If :module is not set, default to Main.
         # nothing can be used to force printing prefix
-        from = get(io, :module, active_module())
+        from = get(io, :module, Main)
         if isdefined(tn, :module) && (from === nothing || !isvisible(sym, tn.module, from::Module))
             show(io, tn.module)
             print(io, ".")
@@ -1188,7 +1180,7 @@ end
 
 function show_at_namedtuple(io::IO, syms::Tuple, types::DataType)
     first = true
-    for i in 1:length(syms)
+    for i in eachindex(syms)
         if !first
             print(io, ", ")
         end
@@ -1407,11 +1399,11 @@ function show_delim_array(io::IO, itr::Union{AbstractArray,SimpleVector}, op, de
                     x = itr[i]
                     show(recur_io, x)
                 end
-                i += 1
-                if i > l
+                if i == l
                     delim_one && first && print(io, delim)
                     break
                 end
+                i += 1
                 first = false
                 print(io, delim)
                 print(io, ' ')
@@ -2204,8 +2196,12 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :do && nargs == 2
         iob = IOContext(io, beginsym=>false)
         show_unquoted(iob, args[1], indent, -1, quote_level)
-        print(io, " do ")
-        show_list(iob, (((args[2]::Expr).args[1])::Expr).args, ", ", 0, 0, quote_level)
+        print(io, " do")
+        do_args = (((args[2]::Expr).args[1])::Expr).args
+        if !isempty(do_args)
+            print(io, ' ')
+            show_list(iob, do_args, ", ", 0, 0, quote_level)
+        end
         for stmt in (((args[2]::Expr).args[2])::Expr).args
             print(io, '\n', " "^(indent + indent_width))
             show_unquoted(iob, stmt, indent + indent_width, -1, quote_level)
@@ -2382,7 +2378,7 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         if get(io, beginsym, false)
             print(io, '(')
             ind = indent + indent_width
-            for i = 1:length(ex.args)
+            for i = eachindex(ex.args)
                 if i > 1
                     # if there was only a comment before the first semicolon, the expression would get parsed as a NamedTuple
                     if !(i == 2 && ex.args[1] isa LineNumberNode)
@@ -2535,7 +2531,7 @@ function show_signature_function(io::IO, @nospecialize(ft), demangle=false, farg
     uw = unwrap_unionall(ft)
     if ft <: Function && isa(uw, DataType) && isempty(uw.parameters) && _isself(uw)
         uwmod = parentmodule(uw)
-        if qualified && !is_exported_from_stdlib(uw.name.mt.name, uwmod) && uwmod !== Main
+        if qualified && !isexported(uwmod, uw.name.mt.name) && uwmod !== Main
             print_within_stacktrace(io, uwmod, '.', bold=true)
         end
         s = sprint(show_sym, (demangle ? demangle_function_name : identity)(uw.name.mt.name), context=io)
@@ -2904,7 +2900,7 @@ function dump(io::IOContext, x::SimpleVector, n::Int, indent)
     end
     print(io, "SimpleVector")
     if n > 0
-        for i = 1:length(x)
+        for i in eachindex(x)
             println(io)
             print(io, indent, "  ", i, ": ")
             if isassigned(x,i)
@@ -3021,7 +3017,7 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
         end
         fields = fieldnames(x)
         fieldtypes = datatype_fieldtypes(x)
-        for idx in 1:length(fields)
+        for idx in eachindex(fields)
             println(io)
             print(io, indent, "  ")
             is_mut && isconst(x, idx) && print(io, "const ")
diff --git a/base/stat.jl b/base/stat.jl
index b17fd54cb1d64..c6fb239a96404 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -63,6 +63,7 @@ struct StatStruct
     blocks  :: Int64
     mtime   :: Float64
     ctime   :: Float64
+    ioerrno :: Int32
 end
 
 @eval function Base.:(==)(x::StatStruct, y::StatStruct) # do not include `desc` in equality or hash
@@ -80,22 +81,23 @@ end
     end)
 end
 
-StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
-StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct("", buf)
-StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
+StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Base.UV_ENOENT)
+StatStruct(buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct("", buf, ioerrno)
+StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Memory{UInt8},Vector{UInt8},Ptr{UInt8}}, ioerrno::Int32) = StatStruct(
     desc isa OS_HANDLE ? desc : String(desc),
-    ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_nlink,   UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_uid,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_gid,     UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_rdev,    UInt32,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_size,    UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_blksize, UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_blocks,  UInt64,  (Ptr{UInt8},), buf),
-    ccall(:jl_stat_mtime,   Float64, (Ptr{UInt8},), buf),
-    ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_nlink,   UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_uid,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_gid,     UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt32) : ccall(:jl_stat_rdev,    UInt32,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_size,    UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blksize, UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(UInt64) : ccall(:jl_stat_blocks,  UInt64,  (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_mtime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno != 0 ? zero(Float64) : ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
+    ioerrno
 )
 
 function iso_datetime_with_relative(t, tnow)
@@ -130,35 +132,41 @@ end
 function show_statstruct(io::IO, st::StatStruct, oneline::Bool)
     print(io, oneline ? "StatStruct(" : "StatStruct for ")
     show(io, st.desc)
-    oneline || print(io, "\n  ")
-    print(io, " size: ", st.size, " bytes")
-    oneline || print(io, "\n")
-    print(io, " device: ", st.device)
-    oneline || print(io, "\n ")
-    print(io, " inode: ", st.inode)
-    oneline || print(io, "\n  ")
-    print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
-    oneline || print(io, "\n ")
-    print(io, " nlink: ", st.nlink)
-    oneline || print(io, "\n   ")
-    print(io, " uid: $(st.uid)")
-    username = getusername(st.uid)
-    username === nothing || print(io, " (", username, ")")
-    oneline || print(io, "\n   ")
-    print(io, " gid: ", st.gid)
-    groupname = getgroupname(st.gid)
-    groupname === nothing || print(io, " (", groupname, ")")
-    oneline || print(io, "\n  ")
-    print(io, " rdev: ", st.rdev)
-    oneline || print(io, "\n ")
-    print(io, " blksz: ", st.blksize)
-    oneline || print(io, "\n")
-    print(io, " blocks: ", st.blocks)
-    tnow = round(UInt, time())
-    oneline || print(io, "\n ")
-    print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
-    oneline || print(io, "\n ")
-    print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    code = st.ioerrno
+    if code != 0
+        print(io, oneline ? " " : "\n ")
+        print(io, Base.uverrorname(code), ": ", Base.struverror(code))
+    else
+        oneline || print(io, "\n  ")
+        print(io, " size: ", st.size, " bytes")
+        oneline || print(io, "\n")
+        print(io, " device: ", st.device)
+        oneline || print(io, "\n ")
+        print(io, " inode: ", st.inode)
+        oneline || print(io, "\n  ")
+        print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
+        oneline || print(io, "\n ")
+        print(io, " nlink: ", st.nlink)
+        oneline || print(io, "\n   ")
+        print(io, " uid: $(st.uid)")
+        username = getusername(st.uid)
+        username === nothing || print(io, " (", username, ")")
+        oneline || print(io, "\n   ")
+        print(io, " gid: ", st.gid)
+        groupname = getgroupname(st.gid)
+        groupname === nothing || print(io, " (", groupname, ")")
+        oneline || print(io, "\n  ")
+        print(io, " rdev: ", st.rdev)
+        oneline || print(io, "\n ")
+        print(io, " blksz: ", st.blksize)
+        oneline || print(io, "\n")
+        print(io, " blocks: ", st.blocks)
+        tnow = round(UInt, time())
+        oneline || print(io, "\n ")
+        print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
+        oneline || print(io, "\n ")
+        print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    end
     oneline && print(io, ")")
     return nothing
 end
@@ -168,31 +176,32 @@ show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false
 
 # stat & lstat functions
 
+checkstat(s::StatStruct) = Int(s.ioerrno) in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL) ? s : uv_error(string("stat(", repr(s.desc), ")"), s.ioerrno)
+
 macro stat_call(sym, arg1type, arg)
     return quote
-        stat_buf = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ())))
+        stat_buf = fill!(Memory{UInt8}(undef, Int(ccall(:jl_sizeof_stat, Int32, ()))), 0x00)
         r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf)
-        if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL))
-            uv_error(string("stat(", repr($(esc(arg))), ")"), r)
-        end
-        st = StatStruct($(esc(arg)), stat_buf)
-        if ispath(st) != (r == 0)
-            error("stat returned zero type for a valid path")
-        end
-        return st
+        return checkstat(StatStruct($(esc(arg)), stat_buf, r))
     end
 end
 
 stat(fd::OS_HANDLE)         = @stat_call jl_fstat OS_HANDLE fd
-stat(path::AbstractString)  = @stat_call jl_stat  Cstring path
-lstat(path::AbstractString) = @stat_call jl_lstat Cstring path
+function stat(path::AbstractString)
+    # @info "stat($(repr(path)))" exception=(ErrorException("Fake error for backtrace printing"),stacktrace())
+    @stat_call jl_stat  Cstring path
+end
+function lstat(path::AbstractString)
+    # @info "lstat($(repr(path)))" exception=(ErrorException("Fake error for backtrace printing"),stacktrace())
+    @stat_call jl_lstat Cstring path
+end
 if RawFD !== OS_HANDLE
     global stat(fd::RawFD)  = stat(Libc._get_osfhandle(fd))
 end
-stat(fd::Integer)           = stat(RawFD(fd))
 
 """
     stat(file)
+    stat(joinpath...)
 
 Return a structure whose fields contain information about the file.
 The fields of the structure are:
@@ -213,16 +222,19 @@ The fields of the structure are:
 | mtime   | `Float64`                       | Unix timestamp of when the file was last modified                  |
 | ctime   | `Float64`                       | Unix timestamp of when the file's metadata was changed             |
 """
+stat(path) = (path2 = joinpath(path); path2 isa typeof(path) ? error("stat not implemented for $(typeof(path))") : stat(path2))
 stat(path...) = stat(joinpath(path...))
 
 """
     lstat(file)
+    lstat(joinpath...)
 
 Like [`stat`](@ref), but for symbolic links gets the info for the link
 itself rather than the file it refers to.
 This function must be called on a file path rather than a file object or a file
 descriptor.
 """
+lstat(path) = (path2 = joinpath(path); path2 isa typeof(path) ? error("lstat not implemented for $(typeof(path))") : lstat(path2))
 lstat(path...) = lstat(joinpath(path...))
 
 # some convenience functions
@@ -325,7 +337,7 @@ Return `true` if a valid filesystem entity exists at `path`,
 otherwise returns `false`.
 This is the generalization of [`isfile`](@ref), [`isdir`](@ref) etc.
 """
-ispath(st::StatStruct) = filemode(st) & 0xf000 != 0x0000
+ispath(st::StatStruct) = st.ioerrno == 0
 function ispath(path::String)
     # We use `access()` and `F_OK` to determine if a given path exists. `F_OK` comes from `unistd.h`.
     F_OK = 0x00
diff --git a/base/stream.jl b/base/stream.jl
index a45307b883da8..93aeead79eb9c 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -462,7 +462,7 @@ function closewrite(s::LibuvStream)
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we won't get spurious notifications later
@@ -1076,7 +1076,7 @@ function uv_write(s::LibuvStream, p::Ptr{UInt8}, n::UInt)
         # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
         sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
diff --git a/base/strings/annotated.jl b/base/strings/annotated.jl
index 75e1dec570ff4..9a0b4b2825436 100644
--- a/base/strings/annotated.jl
+++ b/base/strings/annotated.jl
@@ -39,13 +39,6 @@ the combined range.
 See also [`AnnotatedChar`](@ref), [`annotatedstring`](@ref),
 [`annotations`](@ref), and [`annotate!`](@ref).
 
-!!! warning
-    While the constructors are part of the Base public API, the fields
-    of `AnnotatedString` are not. This is to allow for potential future
-    changes in the implementation of this type. Instead use the
-    [`annotations`](@ref), and [`annotate!`](@ref) getter/setter
-    functions.
-
 # Constructors
 
 ```julia
@@ -81,13 +74,6 @@ More specifically, this is a simple wrapper around any other
 See also: [`AnnotatedString`](@ref), [`annotatedstring`](@ref), `annotations`,
 and `annotate!`.
 
-!!! warning
-    While the constructors are part of the Base public API, the fields
-    of `AnnotatedChar` are not. This it to allow for potential future
-    changes in the implementation of this type. Instead use the
-    [`annotations`](@ref), and [`annotate!`](@ref) getter/setter
-    functions.
-
 # Constructors
 
 ```julia
@@ -384,7 +370,7 @@ a vector of region–annotation tuples.
 In accordance with the semantics documented in [`AnnotatedString`](@ref), the
 order of annotations returned matches the order in which they were applied.
 
-See also: `annotate!`.
+See also: [`annotate!`](@ref).
 """
 annotations(s::AnnotatedString) = s.annotations
 
@@ -593,6 +579,7 @@ function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{Tuple{U
         for i in reverse(axes(annotations, 1))
             annot = annotations[i]
             first(first(annot)) == 1 || continue
+            i <= length(io.annotations) || continue
             if last(annot) == last(last(io.annotations))
                 valid_run = true
                 for runlen in 1:i
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 2d5f0cea26e36..bf11199143c1e 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -146,9 +146,8 @@ Stacktrace:
 
 Return a tuple of the character in `s` at index `i` with the index of the start
 of the following character in `s`. This is the key method that allows strings to
-be iterated, yielding a sequences of characters. If `i` is out of bounds in `s`
-then a bounds error is raised. The `iterate` function, as part of the iteration
-protocol may assume that `i` is the start of a character in `s`.
+be iterated, yielding a sequences of characters. The `iterate` function, as part
+of the iteration protocol may assume that `i` is the start of a character in `s`.
 
 See also [`getindex`](@ref), [`checkbounds`](@ref).
 """
diff --git a/base/strings/io.jl b/base/strings/io.jl
index 9204310129729..df34712b519d5 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -51,6 +51,8 @@ function print(io::IO, xs...)
     return nothing
 end
 
+setfield!(typeof(print).name.mt, :max_args, 10, :monotonic)
+
 """
     println([io::IO], xs...)
 
@@ -74,6 +76,7 @@ julia> String(take!(io))
 """
 println(io::IO, xs...) = print(io, xs..., "\n")
 
+setfield!(typeof(println).name.mt, :max_args, 10, :monotonic)
 ## conversion of general objects to strings ##
 
 """
@@ -149,6 +152,7 @@ function print_to_string(xs...)
     end
     String(_unsafe_take!(s))
 end
+setfield!(typeof(print_to_string).name.mt, :max_args, 10, :monotonic)
 
 function string_with_env(env, xs...)
     if isempty(xs)
@@ -246,7 +250,7 @@ end
 
 # optimized methods to avoid iterating over chars
 write(io::IO, s::Union{String,SubString{String}}) =
-    GC.@preserve s Int(unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))))::Int
+    GC.@preserve s (unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))) % Int)::Int
 print(io::IO, s::Union{String,SubString{String}}) = (write(io, s); nothing)
 
 """
@@ -354,12 +358,22 @@ function join(io::IO, iterator, delim="")
 end
 
 function _join_preserve_annotations(iterator, args...)
-    if _isannotated(eltype(iterator)) || any(_isannotated, args)
+    et = @default_eltype(iterator)
+    if isconcretetype(et) && !_isannotated(et) && !any(_isannotated, args)
+        sprint(join, iterator, args...)
+    else
         io = AnnotatedIOBuffer()
         join(io, iterator, args...)
-        read(seekstart(io), AnnotatedString{String})
-    else
-        sprint(join, iterator, args...)
+        # If we know (from compile time information, or dynamically in the case
+        # of iterators with a non-concrete eltype), that the result is annotated
+        # in nature, we extract an `AnnotatedString`, otherwise we just extract
+        # a plain `String` from `io`.
+        if isconcretetype(et) || !isempty(io.annotations)
+            seekstart(io)
+            read(io, AnnotatedString{String})
+        else
+            String(take!(io.io))
+        end
     end
 end
 
@@ -374,8 +388,8 @@ escape_nul(c::Union{Nothing, AbstractChar}) =
     (c !== nothing && '0' <= c <= '7') ? "\\x00" : "\\0"
 
 """
-    escape_string(str::AbstractString[, esc]; keep = ())::AbstractString
-    escape_string(io, str::AbstractString[, esc]; keep = ())::Nothing
+    escape_string(str::AbstractString[, esc]; keep=(), ascii=false, fullhex=false)::AbstractString
+    escape_string(io, str::AbstractString[, esc]; keep=())::Nothing
 
 General escaping of traditional C and Unicode escape sequences. The first form returns the
 escaped string, the second prints the result to `io`.
@@ -390,11 +404,23 @@ escaped by a prepending backslash (`\"` is also escaped by default in the first
 The argument `keep` specifies a collection of characters which are to be kept as
 they are. Notice that `esc` has precedence here.
 
+The argument `ascii` can be set to `true` to escape all non-ASCII characters,
+whereas the default `ascii=false` outputs printable Unicode characters as-is.
+(`keep` takes precedence over `ascii`.)
+
+The argument `fullhex` can be set to `true` to require all `\\u` escapes to be
+printed with 4 hex digits, and `\\U` escapes to be printed with 8 hex digits,
+whereas by default (`fullhex=false`) they are printed with fewer digits if
+possible (omitting leading zeros).
+
 See also [`unescape_string`](@ref) for the reverse operation.
 
 !!! compat "Julia 1.7"
     The `keep` argument is available as of Julia 1.7.
 
+!!! compat "Julia 1.12"
+    The `ascii` and `fullhex` arguments require Julia 1.12.
+
 # Examples
 ```jldoctest
 julia> escape_string("aaa\\nbbb")
@@ -413,7 +439,7 @@ julia> escape_string(string('\\u2135','\\0','0')) # \\0 would be ambiguous
 "ℵ\\\\x000"
 ```
 """
-function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
+function escape_string(io::IO, s::AbstractString, esc=""; keep = (), ascii::Bool=false, fullhex::Bool=false)
     a = Iterators.Stateful(s)
     for c::AbstractChar in a
         if c in esc
@@ -428,10 +454,10 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
             isprint(c)         ? print(io, c) :
                                  print(io, "\\x", string(UInt32(c), base = 16, pad = 2))
         elseif !isoverlong(c) && !ismalformed(c)
-            isprint(c)         ? print(io, c) :
-            c <= '\x7f'        ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
-            c <= '\uffff'      ? print(io, "\\u", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
-                                 print(io, "\\U", string(UInt32(c), base = 16, pad = need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
+            !ascii && isprint(c) ? print(io, c) :
+            c <= '\x7f'          ? print(io, "\\x", string(UInt32(c), base = 16, pad = 2)) :
+            c <= '\uffff'        ? print(io, "\\u", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 4 : 2)) :
+                                   print(io, "\\U", string(UInt32(c), base = 16, pad = fullhex || need_full_hex(peek(a)::Union{AbstractChar,Nothing}) ? 8 : 4))
         else # malformed or overlong
             u = bswap(reinterpret(UInt32, c)::UInt32)
             while true
@@ -442,8 +468,8 @@ function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
     end
 end
 
-escape_string(s::AbstractString, esc=('\"',); keep = ()) =
-    sprint((io)->escape_string(io, s, esc; keep = keep), sizehint=lastindex(s))
+escape_string(s::AbstractString, esc=('\"',); keep = (), ascii::Bool=false, fullhex::Bool=false) =
+    sprint((io)->escape_string(io, s, esc; keep, ascii, fullhex), sizehint=lastindex(s))
 
 function print_quoted(io, s::AbstractString)
     print(io, '"')
diff --git a/base/strings/search.jl b/base/strings/search.jl
index b9c14f06e0898..a481b3af775e0 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -10,7 +10,29 @@ match strings with [`match`](@ref).
 """
 abstract type AbstractPattern end
 
-nothing_sentinel(i) = i == 0 ? nothing : i
+# TODO: These unions represent bytes in memory that can be accessed via a pointer.
+# this property is used throughout Julia, e.g. also in IO code.
+# This deserves a better solution - see #53178.
+# If such a better solution comes in place, these unions should be replaced.
+const DenseInt8 = Union{
+    DenseArray{Int8},
+    FastContiguousSubArray{Int8,N,<:DenseArray} where N
+}
+
+# Note: This union is different from that above in that it includes CodeUnits.
+# Currently, this is redundant as CodeUnits <: DenseVector, but this subtyping
+# is buggy and may be removed in the future, see #54002
+const DenseUInt8 = Union{
+    DenseArray{UInt8},
+    FastContiguousSubArray{UInt8,N,<:DenseArray} where N,
+    CodeUnits{UInt8, <:Union{String, SubString{String}}},
+    FastContiguousSubArray{UInt8,N,<:CodeUnits{UInt8, <:Union{String, SubString{String}}}} where N,
+}
+
+const DenseUInt8OrInt8 = Union{DenseUInt8, DenseInt8}
+
+last_byteindex(x::Union{String, SubString{String}}) = ncodeunits(x)
+last_byteindex(x::DenseUInt8OrInt8) = lastindex(x)
 
 function last_utf8_byte(c::Char)
     u = reinterpret(UInt32, c)
@@ -30,11 +52,11 @@ function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar}
     end
     @inbounds isvalid(s, i) || string_index_err(s, i)
     c = pred.x
-    c ≤ '\x7f' && return nothing_sentinel(_search(s, c % UInt8, i))
+    c ≤ '\x7f' && return _search(s, first_utf8_byte(c), i)
     while true
         i = _search(s, first_utf8_byte(c), i)
-        i == 0 && return nothing
-        pred(s[i]) && return i
+        i === nothing && return nothing
+        isvalid(s, i) && pred(s[i]) && return i
         i = nextind(s, i)
     end
 end
@@ -47,31 +69,41 @@ const DenseBytes = Union{
     CodeUnits{UInt8, <:Union{String, SubString{String}}},
 }
 
-const ByteArray = Union{DenseBytes, DenseArrayType{Int8}}
+function findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{UInt8, Int8}}, a::Union{DenseInt8, DenseUInt8})
+    findnext(pred, a, firstindex(a))
+end
 
-findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) =
-    nothing_sentinel(_search(a, pred.x))
+function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer)
+    _search(a, pred.x, i)
+end
 
-findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
-    nothing_sentinel(_search(a, pred.x, i))
+function findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer)
+    _search(a, pred.x, i)
+end
 
-findfirst(::typeof(iszero), a::ByteArray) = nothing_sentinel(_search(a, zero(UInt8)))
-findnext(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_search(a, zero(UInt8), i))
+# iszero is special, in that the bitpattern for zero for Int8 and UInt8 is the same,
+# so we can use memchr even if we search for an Int8 in an UInt8 array or vice versa
+findfirst(::typeof(iszero), a::DenseUInt8OrInt8) = _search(a, zero(UInt8))
+findnext(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer) = _search(a, zero(UInt8), i)
 
-function _search(a::Union{String,SubString{String},<:ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1)
-    if i < 1
+function _search(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = firstindex(a))
+    fst = firstindex(a)
+    lst = last_byteindex(a)
+    if i < fst
         throw(BoundsError(a, i))
     end
-    n = sizeof(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    n_bytes = lst - i + 1
+    if i > lst
+        return i == lst+1 ? nothing : throw(BoundsError(a, i))
     end
-    p = pointer(a)
-    q = GC.@preserve a ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-1, b, n-i+1)
-    return q == C_NULL ? 0 : Int(q-p+1)
+    GC.@preserve a begin
+        p = pointer(a)
+        q = ccall(:memchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p+i-fst, b, n_bytes)
+    end
+    return q == C_NULL ? nothing : (q-p+fst) % Int
 end
 
-function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
+function _search(a::DenseUInt8, b::AbstractChar, i::Integer = firstindex(a))
     if isascii(b)
         _search(a,UInt8(b),i)
     else
@@ -80,41 +112,51 @@ function _search(a::ByteArray, b::AbstractChar, i::Integer = 1)
 end
 
 function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:AbstractChar},
-                  s::String, i::Integer)
+                  s::Union{String, SubString{String}}, i::Integer)
     c = pred.x
-    c ≤ '\x7f' && return nothing_sentinel(_rsearch(s, c % UInt8, i))
+    c ≤ '\x7f' && return _rsearch(s, first_utf8_byte(c), i)
     b = first_utf8_byte(c)
     while true
         i = _rsearch(s, b, i)
-        i == 0 && return nothing
-        pred(s[i]) && return i
+        i == nothing && return nothing
+        isvalid(s, i) && pred(s[i]) && return i
         i = prevind(s, i)
     end
 end
 
-findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray) =
-    nothing_sentinel(_rsearch(a, pred.x))
+function findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::DenseUInt8OrInt8)
+    findprev(pred, a, lastindex(a))
+end
 
-findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
-    nothing_sentinel(_rsearch(a, pred.x, i))
+function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},Int8}, a::DenseInt8, i::Integer)
+    _rsearch(a, pred.x, i)
+end
 
-findlast(::typeof(iszero), a::ByteArray) = nothing_sentinel(_rsearch(a, zero(UInt8)))
-findprev(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_rsearch(a, zero(UInt8), i))
+function findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},UInt8}, a::DenseUInt8, i::Integer)
+    _rsearch(a, pred.x, i)
+end
 
-function _rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(a))
-    if i < 1
-        return i == 0 ? 0 : throw(BoundsError(a, i))
+# See comments above for findfirst(::typeof(iszero)) methods
+findlast(::typeof(iszero), a::DenseUInt8OrInt8) = _rsearch(a, zero(UInt8))
+findprev(::typeof(iszero), a::DenseUInt8OrInt8, i::Integer) = _rsearch(a, zero(UInt8), i)
+
+function _rsearch(a::Union{String,SubString{String},DenseUInt8OrInt8}, b::Union{Int8,UInt8}, i::Integer = last_byteindex(a))
+    fst = firstindex(a)
+    lst = last_byteindex(a)
+    if i < fst
+        return i == fst - 1 ? nothing : throw(BoundsError(a, i))
+    end
+    if i > lst
+        return i == lst+1 ? nothing : throw(BoundsError(a, i))
     end
-    n = sizeof(a)
-    if i > n
-        return i == n+1 ? 0 : throw(BoundsError(a, i))
+    GC.@preserve a begin
+        p = pointer(a)
+        q = ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i-fst+1)
     end
-    p = pointer(a)
-    q = GC.@preserve a ccall(:memrchr, Ptr{UInt8}, (Ptr{UInt8}, Int32, Csize_t), p, b, i)
-    return q == C_NULL ? 0 : Int(q-p+1)
+    return q == C_NULL ? nothing : (q-p+fst) % Int
 end
 
-function _rsearch(a::ByteArray, b::AbstractChar, i::Integer = length(a))
+function _rsearch(a::DenseUInt8, b::AbstractChar, i::Integer = length(a))
     if isascii(b)
         _rsearch(a,UInt8(b),i)
     else
@@ -136,7 +178,7 @@ function findall(
     i = firstindex(s)
     while true
         i = _search(s, byte, i)
-        iszero(i) && return result
+        isnothing(i) && return result
         i += 1
         index = i - ncu
         # If the char is invalid, it's possible that its first byte is
@@ -224,18 +266,19 @@ end
 
 in(c::AbstractChar, s::AbstractString) = (findfirst(isequal(c),s)!==nothing)
 
-function _searchindex(s::Union{AbstractString,ByteArray},
+function _searchindex(s::Union{AbstractString,DenseUInt8OrInt8},
                       t::Union{AbstractString,AbstractChar,Int8,UInt8},
                       i::Integer)
+    sentinel = firstindex(s) - 1
     x = Iterators.peel(t)
     if isnothing(x)
-        return 1 <= i <= nextind(s,lastindex(s))::Int ? i :
+        return firstindex(s) <= i <= nextind(s,lastindex(s))::Int ? i :
                throw(BoundsError(s, i))
     end
     t1, trest = x
     while true
         i = findnext(isequal(t1),s,i)
-        if i === nothing return 0 end
+        if i === nothing return sentinel end
         ii = nextind(s, i)::Int
         a = Iterators.Stateful(trest)
         matched = all(splat(==), zip(SubString(s, ii), a))
@@ -509,9 +552,8 @@ julia> findall(UInt8[1,2], UInt8[1,2,3,1,2])
 !!! compat "Julia 1.3"
      This method requires at least Julia 1.3.
 """
-
-function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
-                 s::Union{AbstractString, AbstractPattern, AbstractVector{<:Union{Int8,UInt8}}},
+function findall(t::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}},
+                 s::Union{AbstractString, AbstractPattern, AbstractVector{UInt8}},
                  ; overlap::Bool=false)
     found = UnitRange{Int}[]
     i, e = firstindex(s), lastindex(s)
@@ -564,7 +606,7 @@ function _rsearchindex(s::AbstractString,
     end
 end
 
-function _rsearchindex(s::String, t::String, i::Integer)
+function _rsearchindex(s::Union{String, SubString{String}}, t::Union{String, SubString{String}}, i::Integer)
     # Check for fast case of a single byte
     if lastindex(t) == 1
         return something(findprev(isequal(t[1]), s, i), 0)
diff --git a/base/strings/string.jl b/base/strings/string.jl
index 7917f463771b2..a46ee60e4f023 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -102,9 +102,11 @@ function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}})
     ccall(:jl_cstr_to_string, Ref{String}, (Ptr{UInt8},), p)
 end
 
-# This is @assume_effects :effect_free :nothrow :terminates_globally @ccall jl_alloc_string(n::Csize_t)::Ref{String},
+# This is `@assume_effects :total !:consistent @ccall jl_alloc_string(n::Csize_t)::Ref{String}`,
 # but the macro is not available at this time in bootstrap, so we write it manually.
-@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String}, Expr(:call, Expr(:core, :svec), :Csize_t), 1, QuoteNode((:ccall,0x000e)), :(convert(Csize_t, n))))
+const _string_n_override = 0x04ee
+@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String},
+    :(Core.svec(Csize_t)), 1, QuoteNode((:ccall, _string_n_override)), :(convert(Csize_t, n))))
 
 """
     String(s::AbstractString)
@@ -115,10 +117,7 @@ String(s::AbstractString) = print_to_string(s)
 @assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
 
 unsafe_wrap(::Type{Memory{UInt8}}, s::String) = ccall(:jl_string_to_genericmemory, Ref{Memory{UInt8}}, (Any,), s)
-function unsafe_wrap(::Type{Vector{UInt8}}, s::String)
-    mem = unsafe_wrap(Memory{UInt8}, s)
-    view(mem, eachindex(mem))
-end
+unsafe_wrap(::Type{Vector{UInt8}}, s::String) = wrap(Array, unsafe_wrap(Memory{UInt8}, s))
 
 Vector{UInt8}(s::CodeUnits{UInt8,String}) = copyto!(Vector{UInt8}(undef, length(s)), s)
 Vector{UInt8}(s::String) = Vector{UInt8}(codeunits(s))
@@ -209,7 +208,7 @@ end
             i = i′
             @inbounds l = codeunit(s, i)
             (l < 0x80) | (0xf8 ≤ l) && return i+1
-            @assert l >= 0xc0
+            @assert l >= 0xc0 "invalid codeunit"
         end
         # first continuation byte
         (i += 1) > n && return i
@@ -571,9 +570,10 @@ julia> repeat('A', 3)
 ```
 """
 function repeat(c::AbstractChar, r::Integer)
+    r < 0 && throw(ArgumentError("can't repeat a character $r times"))
+    r = UInt(r)::UInt
     c = Char(c)::Char
     r == 0 && return ""
-    r < 0 && throw(ArgumentError("can't repeat a character $r times"))
     u = bswap(reinterpret(UInt32, c))
     n = 4 - (leading_zeros(u | 0xff) >> 3)
     s = _string_n(n*r)
diff --git a/base/strings/substring.jl b/base/strings/substring.jl
index 2a6b4ae7b9a22..50717d3c27e23 100644
--- a/base/strings/substring.jl
+++ b/base/strings/substring.jl
@@ -272,6 +272,7 @@ end
 
 function repeat(s::Union{String, SubString{String}}, r::Integer)
     r < 0 && throw(ArgumentError("can't repeat a string $r times"))
+    r = UInt(r)::UInt
     r == 0 && return ""
     r == 1 && return String(s)
     n = sizeof(s)
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 42a4106d0f52f..ad047514c85a6 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -6,7 +6,7 @@ module Unicode
 import Base: show, ==, hash, string, Symbol, isless, length, eltype,
              convert, isvalid, ismalformed, isoverlong, iterate,
              AnnotatedString, AnnotatedChar, annotated_chartransform,
-             @assume_effects
+             @assume_effects, annotations
 
 # whether codepoints are valid Unicode scalar values, i.e. 0-0xd7ff, 0xe000-0x10ffff
 
@@ -256,6 +256,15 @@ julia> textwidth('⛵')
 ```
 """
 function textwidth(c::AbstractChar)
+    ismalformed(c) && return 1
+    i = codepoint(c)
+    i < 0x7f && return Int(i >= 0x20) # ASCII fast path
+    Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
+end
+
+function textwidth(c::Char)
+    b = bswap(reinterpret(UInt32, c)) # from isascii(c)
+    b < 0x7f && return Int(b >= 0x20) # ASCII fast path
     ismalformed(c) && return 1
     Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), c))
 end
diff --git a/base/strings/util.jl b/base/strings/util.jl
index 4b701001a8676..0ba76e1c76fa0 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -513,6 +513,154 @@ function rpad(
     r == 0 ? stringfn(s, p^q) : stringfn(s, p^q, first(p, r))
 end
 
+"""
+    rtruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the last characters
+with `replacement` if necessary. The default replacement string is "…".
+
+# Examples
+```jldoctest
+julia> s = rtruncate("🍕🍕 I love 🍕", 10)
+"🍕🍕 I lo…"
+
+julia> textwidth(s)
+10
+
+julia> rtruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`ltruncate`](@ref) and [`ctruncate`](@ref).
+"""
+function rtruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:right))
+    if isnothing(ret)
+        return string(str)
+    else
+        left, _ = ret::Tuple{Int,Int}
+        @views return str[begin:left] * replacement
+    end
+end
+
+"""
+    ltruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the first characters
+with `replacement` if necessary. The default replacement string is "…".
+
+# Examples
+```jldoctest
+julia> s = ltruncate("🍕🍕 I love 🍕", 10)
+"…I love 🍕"
+
+julia> textwidth(s)
+10
+
+julia> ltruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`rtruncate`](@ref) and [`ctruncate`](@ref).
+"""
+function ltruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…')
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:left))
+    if isnothing(ret)
+        return string(str)
+    else
+        _, right = ret::Tuple{Int,Int}
+        @views return replacement * str[right:end]
+    end
+end
+
+"""
+    ctruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…'; prefer_left::Bool = true)
+
+Truncate `str` to at most `maxwidth` columns (as estimated by [`textwidth`](@ref)), replacing the middle characters
+with `replacement` if necessary. The default replacement string is "…". By default, the truncation
+prefers keeping chars on the left, but this can be changed by setting `prefer_left` to `false`.
+
+# Examples
+```jldoctest
+julia> s = ctruncate("🍕🍕 I love 🍕", 10)
+"🍕🍕 …e 🍕"
+
+julia> textwidth(s)
+10
+
+julia> ctruncate("foo", 3)
+"foo"
+```
+
+!!! compat "Julia 1.12"
+    This function was added in Julia 1.12.
+
+See also [`ltruncate`](@ref) and [`rtruncate`](@ref).
+"""
+function ctruncate(str::AbstractString, maxwidth::Integer, replacement::Union{AbstractString,AbstractChar} = '…'; prefer_left::Bool = true)
+    ret = string_truncate_boundaries(str, Int(maxwidth), replacement, Val(:center), prefer_left)
+    if isnothing(ret)
+        return string(str)
+    else
+        left, right = ret::Tuple{Int,Int}
+        @views return str[begin:left] * replacement * str[right:end]
+    end
+end
+
+function string_truncate_boundaries(
+            str::AbstractString,
+            maxwidth::Integer,
+            replacement::Union{AbstractString,AbstractChar},
+            ::Val{mode},
+            prefer_left::Bool = true) where {mode}
+
+    maxwidth >= 0 || throw(ArgumentError("maxwidth $maxwidth should be non-negative"))
+
+    # check efficiently for early return if str is less wide than maxwidth
+    total_width = 0
+    for c in str
+        total_width += textwidth(c)
+        total_width > maxwidth && break
+    end
+    total_width <= maxwidth && return nothing
+
+    l0, _ = left, right = firstindex(str), lastindex(str)
+    width = textwidth(replacement)
+    # used to balance the truncated width on either side
+    rm_width_left, rm_width_right, force_other = 0, 0, false
+    @inbounds while true
+        if mode === :left || (mode === :center && (!prefer_left || left > l0))
+            rm_width = textwidth(str[right])
+            if mode === :left || (rm_width_right <= rm_width_left || force_other)
+                force_other = false
+                (width += rm_width) <= maxwidth || break
+                rm_width_right += rm_width
+                right = prevind(str, right)
+            else
+                force_other = true
+            end
+        end
+        if mode ∈ (:right, :center)
+            rm_width = textwidth(str[left])
+            if mode === :left || (rm_width_left <= rm_width_right || force_other)
+                force_other = false
+                (width += textwidth(str[left])) <= maxwidth || break
+                rm_width_left += rm_width
+                left = nextind(str, left)
+            else
+                force_other = true
+            end
+        end
+    end
+    return prevind(str, left), nextind(str, right)
+end
+
 """
     eachsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
     eachsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index 3cb95396502a9..7dab313cf4f57 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -56,6 +56,8 @@ global STDLIB::String = "$BINDIR/../share/julia/stdlib/v$(VERSION.major).$(VERSI
 # In case STDLIB change after julia is built, the variable below can be used
 # to update cached method locations to updated ones.
 const BUILD_STDLIB_PATH = STDLIB
+# Similarly, this is the root of the julia repo directory that julia was built from
+const BUILD_ROOT_PATH = "$BINDIR/../.."
 
 # helper to avoid triggering precompile warnings
 
@@ -165,7 +167,7 @@ end
 # without pulling in anything unnecessary like `CPU_NAME`
 function __init_build()
     global BINDIR = ccall(:jl_get_julia_bindir, Any, ())::String
-    vers = "v$(VERSION.major).$(VERSION.minor)"
+    vers = "v$(string(VERSION.major)).$(string(VERSION.minor))"
     global STDLIB = abspath(BINDIR, "..", "share", "julia", "stdlib", vers)
     nothing
 end
diff --git a/base/task.jl b/base/task.jl
index cd06f0acd7f7d..6cb1ff785eeee 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -156,20 +156,10 @@ const task_state_runnable = UInt8(0)
 const task_state_done     = UInt8(1)
 const task_state_failed   = UInt8(2)
 
-const _state_index = findfirst(==(:_state), fieldnames(Task))
-@eval function load_state_acquire(t)
-    # TODO: Replace this by proper atomic operations when available
-    @GC.preserve t llvmcall($("""
-        %rv = load atomic i8, i8* %0 acquire, align 8
-        ret i8 %rv
-        """), UInt8, Tuple{Ptr{UInt8}},
-        Ptr{UInt8}(pointer_from_objref(t) + fieldoffset(Task, _state_index)))
-end
-
 @inline function getproperty(t::Task, field::Symbol)
     if field === :state
         # TODO: this field name should be deprecated in 2.0
-        st = load_state_acquire(t)
+        st = @atomic :acquire t._state
         if st === task_state_runnable
             return :runnable
         elseif st === task_state_done
@@ -223,7 +213,7 @@ julia> istaskdone(b)
 true
 ```
 """
-istaskdone(t::Task) = load_state_acquire(t) !== task_state_runnable
+istaskdone(t::Task) = (@atomic :acquire t._state) !== task_state_runnable
 
 """
     istaskstarted(t::Task) -> Bool
@@ -267,7 +257,7 @@ true
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
 """
-istaskfailed(t::Task) = (load_state_acquire(t) === task_state_failed)
+istaskfailed(t::Task) = ((@atomic :acquire t._state) === task_state_failed)
 
 Threads.threadid(t::Task) = Int(ccall(:jl_get_task_tid, Int16, (Any,), t)+1)
 function Threads.threadpool(t::Task)
@@ -320,6 +310,7 @@ end
 
 # just wait for a task to be done, no error propagation
 function _wait(t::Task)
+    t === current_task() && Core.throw(ConcurrencyViolationError("deadlock detected: cannot wait on current task"))
     if !istaskdone(t)
         donenotify = t.donenotify::ThreadSynchronizer
         lock(donenotify)
@@ -374,7 +365,6 @@ in an error, thrown as a [`TaskFailedException`](@ref) which wraps the failed ta
 Throws a `ConcurrencyViolationError` if `t` is the currently running task, to prevent deadlocks.
 """
 function wait(t::Task; throw=true)
-    t === current_task() && Core.throw(ConcurrencyViolationError("deadlock detected: cannot wait on current task"))
     _wait(t)
     if throw && istaskfailed(t)
         Core.throw(TaskFailedException(t))
@@ -813,6 +803,17 @@ macro sync_add(expr)
     end
 end
 
+function repl_backend_task()
+    @isdefined(active_repl_backend) || return
+    backend = active_repl_backend
+    isdefined(backend, :backend_task) || return
+    backend_task = getfield(active_repl_backend, :backend_task)::Task
+    if backend_task._state === task_state_runnable && getfield(backend, :in_eval)
+        return backend_task
+    end
+    return
+end
+
 # runtime system hook called when a task finishes
 function task_done_hook(t::Task)
     # `finish_task` sets `sigatomic` before entering this function
@@ -834,10 +835,9 @@ function task_done_hook(t::Task)
     end
 
     if err && !handled && Threads.threadid() == 1
-        if isa(result, InterruptException) && isdefined(Base, :active_repl_backend) &&
-            active_repl_backend.backend_task._state === task_state_runnable && isempty(Workqueue) &&
-            active_repl_backend.in_eval
-            throwto(active_repl_backend.backend_task, result) # this terminates the task
+        if isa(result, InterruptException) && isempty(Workqueue)
+            backend = repl_backend_task()
+            backend isa Task && throwto(backend, result)
         end
     end
     # Clear sigatomic before waiting
@@ -848,14 +848,11 @@ function task_done_hook(t::Task)
         # If an InterruptException happens while blocked in the event loop, try handing
         # the exception to the REPL task since the current task is done.
         # issue #19467
-        if Threads.threadid() == 1 &&
-            isa(e, InterruptException) && isdefined(Base, :active_repl_backend) &&
-            active_repl_backend.backend_task._state === task_state_runnable && isempty(Workqueue) &&
-            active_repl_backend.in_eval
-            throwto(active_repl_backend.backend_task, e)
-        else
-            rethrow()
+        if Threads.threadid() == 1 && isa(e, InterruptException) && isempty(Workqueue)
+            backend = repl_backend_task()
+            backend isa Task && throwto(backend, e)
         end
+        rethrow() # this will terminate the program
     end
 end
 
@@ -1029,7 +1026,7 @@ function schedule(t::Task, @nospecialize(arg); error=false)
     # schedule a task to be (re)started with the given value or exception
     t._state === task_state_runnable || Base.error("schedule: Task not runnable")
     if error
-        t.queue === nothing || Base.list_deletefirst!(t.queue::IntrusiveLinkedList{Task}, t)
+        q = t.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, t)
         setfield!(t, :result, arg)
         setfield!(t, :_isexception, true)
     else
@@ -1053,7 +1050,7 @@ function yield()
     try
         wait()
     catch
-        ct.queue === nothing || list_deletefirst!(ct.queue::IntrusiveLinkedList{Task}, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         rethrow()
     end
 end
diff --git a/base/terminfo.jl b/base/terminfo.jl
index 6f1d1ca8015f0..8ea8387077d36 100644
--- a/base/terminfo.jl
+++ b/base/terminfo.jl
@@ -245,7 +245,8 @@ end
 Locate the terminfo file for `term`, return `nothing` if none could be found.
 
 The lookup policy is described in `terminfo(5)` "Fetching Compiled
-Descriptions".
+Descriptions". A terminfo database is included by default with Julia and is
+taken to be the first entry of `@TERMINFO_DIRS@`.
 """
 function find_terminfo_file(term::String)
     isempty(term) && return
@@ -261,6 +262,7 @@ function find_terminfo_file(term::String)
         append!(terminfo_dirs,
                 replace(split(ENV["TERMINFO_DIRS"], ':'),
                         "" => "/usr/share/terminfo"))
+    push!(terminfo_dirs, normpath(Sys.BINDIR, DATAROOTDIR, "julia", "terminfo"))
     Sys.isunix() &&
         push!(terminfo_dirs, "/etc/terminfo", "/lib/terminfo", "/usr/share/terminfo")
     for dir in terminfo_dirs
@@ -268,8 +270,15 @@ function find_terminfo_file(term::String)
             return joinpath(dir, chr, term)
         elseif isfile(joinpath(dir, chrcode, term))
             return joinpath(dir, chrcode, term)
+        elseif isfile(joinpath(dir, lowercase(chr), lowercase(term)))
+            # The vendored terminfo database is fully lowercase to avoid issues on
+            # case-sensitive filesystems. On Unix-like systems, terminfo files with
+            # different cases are hard links to one another, so this is still
+            # correct for non-vendored terminfo, just redundant.
+            return joinpath(dir, lowercase(chr), lowercase(term))
         end
     end
+    return nothing
 end
 
 """
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index 7212cb664f37e..a21d708b4a077 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -4,10 +4,10 @@ export threadid, nthreads, @threads, @spawn,
        threadpool, nthreadpools
 
 """
-    Threads.threadid() -> Int
+    Threads.threadid([t::Task]) -> Int
 
-Get the ID number of the current thread of execution. The master thread has
-ID `1`.
+Get the ID number of the current thread of execution, or the thread of task
+`t`. The master thread has ID `1`.
 
 # Examples
 ```julia-repl
@@ -21,12 +21,15 @@ julia> Threads.@threads for i in 1:4
 2
 5
 4
+
+julia> Threads.threadid(Threads.@spawn "foo")
+2
 ```
 
 !!! note
     The thread that a task runs on may change if the task yields, which is known as [`Task Migration`](@ref man-task-migration).
-    For this reason in most cases it is not safe to use `threadid()` to index into, say, a vector of buffer or stateful objects.
-
+    For this reason in most cases it is not safe to use `threadid([task])` to index into, say, a vector of buffers or stateful
+    objects.
 """
 threadid() = Int(ccall(:jl_threadid, Int16, ())+1)
 
@@ -93,6 +96,24 @@ function threadpool(tid = threadid())
     return _tpid_to_sym(tpid)
 end
 
+"""
+    Threads.threadpooldescription(tid = threadid()) -> String
+
+Returns the specified thread's threadpool name with extended description where appropriate.
+"""
+function threadpooldescription(tid = threadid())
+    threadpool_name = threadpool(tid)
+    if threadpool_name == :foreign
+        # TODO: extend tls to include a field to add a description to a foreign thread and make this more general
+        n_others = nthreads(:interactive) + nthreads(:default)
+        # Assumes GC threads come first in the foreign thread pool
+        if tid > n_others && tid <= n_others + ngcthreads()
+            return "foreign: gc"
+        end
+    end
+    return string(threadpool_name)
+end
+
 """
     Threads.nthreadpools() -> Int
 
diff --git a/base/timing.jl b/base/timing.jl
index 7e5891e2a3954..b094aa230e1c2 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# This type must be kept in sync with the C struct in src/gc.h
+# This type must be kept in sync with the C struct in src/gc-interface.h
 struct GC_Num
     allocd          ::Int64 # GC internal
     deferred_alloc  ::Int64 # GC internal
@@ -47,7 +47,7 @@ gc_total_bytes(gc_num::GC_Num) =
     gc_num.allocd + gc_num.deferred_alloc + gc_num.total_allocd
 
 function GC_Diff(new::GC_Num, old::GC_Num)
-    # logic from `src/gc.c:jl_gc_total_bytes`
+    # logic from `jl_gc_total_bytes`
     old_allocd = gc_total_bytes(old)
     new_allocd = gc_total_bytes(new)
     return GC_Diff(new_allocd       - old_allocd,
@@ -104,6 +104,33 @@ function gc_page_utilization_data()
     return Base.unsafe_wrap(Array, page_utilization_raw, JL_GC_N_MAX_POOLS, own=false)
 end
 
+# must be kept in sync with `src/gc-stock.h``
+const FULL_SWEEP_REASONS = [:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL, :FULL_SWEEP_REASON_FORCED_FULL_SWEEP,
+                            :FULL_SWEEP_REASON_USER_MAX_EXCEEDED, :FULL_SWEEP_REASON_LARGE_PROMOTION_RATE]
+
+"""
+    Base.full_sweep_reasons()
+
+Return a dictionary of the number of times each full sweep reason has occurred.
+
+The reasons are:
+- `:FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL`: Full sweep was caused due to `always_full` being set in the GC debug environment
+- `:FULL_SWEEP_REASON_FORCED_FULL_SWEEP`: Full sweep was forced by `GC.gc(true)`
+- `:FULL_SWEEP_REASON_USER_MAX_EXCEEDED`: Full sweep was forced due to the system reaching the heap soft size limit
+- `:FULL_SWEEP_REASON_LARGE_PROMOTION_RATE`: Full sweep was forced by a large promotion rate across GC generations
+
+Note that the set of reasons is not guaranteed to be stable across minor versions of Julia.
+"""
+function full_sweep_reasons()
+    reason = cglobal(:jl_full_sweep_reasons, UInt64)
+    reasons_as_array = Base.unsafe_wrap(Vector{UInt64}, reason, length(FULL_SWEEP_REASONS), own=false)
+    d = Dict{Symbol, Int64}()
+    for (i, r) in enumerate(FULL_SWEEP_REASONS)
+        d[r] = reasons_as_array[i]
+    end
+    return d
+end
+
 """
     Base.jit_total_bytes()
 
@@ -179,7 +206,7 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
             print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
         end
         print(io, timestr, " seconds")
-        parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
+        parens = bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0 || compile_time > 0
         parens && print(io, " (")
         if bytes != 0 || allocs != 0
             allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
@@ -197,11 +224,14 @@ function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_confl
             print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
         end
         if lock_conflicts > 0
+            if bytes != 0 || allocs != 0 || gctime > 0
+                print(io, ", ")
+            end
             plural = lock_conflicts == 1 ? "" : "s"
-            print(io, ", ", lock_conflicts, " lock conflict$plural")
+            print(io, lock_conflicts, " lock conflict$plural")
         end
         if compile_time > 0
-            if bytes != 0 || allocs != 0 || gctime > 0
+            if bytes != 0 || allocs != 0 || gctime > 0 || lock_conflicts > 0
                 print(io, ", ")
             end
             print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
@@ -315,7 +345,8 @@ macro time(msg, ex)
     quote
         local ret = @timed $(esc(ex))
         local _msg = $(esc(msg))
-        time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg)
+        local _msg_str = _msg === nothing ? _msg : string(_msg)
+        time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, true; msg=_msg_str)
         ret.value
     end
 end
@@ -387,7 +418,8 @@ macro timev(msg, ex)
     quote
         local ret = @timed $(esc(ex))
         local _msg = $(esc(msg))
-        timev_print(ret.time*1e9, ret.gcstats, ret.lock_conflicts, (ret.compile_time*1e9, ret.recompile_time*1e9); msg=_msg)
+        local _msg_str = _msg === nothing ? _msg : string(_msg)
+        timev_print(ret.time*1e9, ret.gcstats, ret.lock_conflicts, (ret.compile_time*1e9, ret.recompile_time*1e9); msg=_msg_str)
         ret.value
     end
 end
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
index d50ca3b423c26..4d07cfed05d8a 100644
--- a/base/toml_parser.jl
+++ b/base/toml_parser.jl
@@ -38,7 +38,7 @@ const TOMLDict  = Dict{String, Any}
 # Parser #
 ##########
 
-mutable struct Parser
+mutable struct Parser{Dates}
     str::String
     # 1 character look ahead
     current_char::Char
@@ -84,14 +84,11 @@ mutable struct Parser
 
     # Filled in in case we are parsing a file to improve error messages
     filepath::Union{String, Nothing}
-
-    # Optionally populate with the Dates stdlib to change the type of Date types returned
-    Dates::Union{Module, Nothing}
 end
 
-function Parser(str::String; filepath=nothing)
+function Parser{Dates}(str::String; filepath=nothing) where {Dates}
     root = TOMLDict()
-    l = Parser(
+    l = Parser{Dates}(
             str,                  # str
             EOF_CHAR,             # current_char
             firstindex(str),      # pos
@@ -106,12 +103,12 @@ function Parser(str::String; filepath=nothing)
             IdSet{Any}(),         # static_arrays
             IdSet{TOMLDict}(),    # defined_tables
             root,
-            filepath,
-            nothing
+            filepath
         )
     startup(l)
     return l
 end
+
 function startup(l::Parser)
     # Populate our one character look-ahead
     c = eat_char(l)
@@ -122,8 +119,10 @@ function startup(l::Parser)
     end
 end
 
-Parser() = Parser("")
-Parser(io::IO) = Parser(read(io, String))
+Parser{Dates}() where {Dates} = Parser{Dates}("")
+Parser{Dates}(io::IO) where {Dates} = Parser{Dates}(read(io, String))
+
+# Parser(...) will be defined by TOML stdlib
 
 function reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing)
     p.str = str
@@ -492,8 +491,10 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String},
         d = d::TOMLDict
         key = dotted_keys[i]
         d = get!(TOMLDict, d, key)
-        if d isa Vector
+        if d isa Vector{Any}
             d = d[end]
+        elseif d isa Vector
+            return ParserError(ErrKeyAlreadyHasValue)
         end
         check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
     end
@@ -534,7 +535,7 @@ function parse_array_table(l)::Union{Nothing, ParserError}
     end
     d = @try recurse_dict!(l, l.root, @view(table_key[1:end-1]), false)
     k = table_key[end]
-    old = get!(() -> [], d, k)
+    old = get!(() -> Any[], d, k)
     if old isa Vector
         if old in l.static_arrays
             return ParserError(ErrAddArrayToStaticArray)
@@ -543,7 +544,7 @@ function parse_array_table(l)::Union{Nothing, ParserError}
         return ParserError(ErrArrayTreatedAsDictionary)
     end
     d_new = TOMLDict()
-    push!(old, d_new)
+    push!(old::Vector{Any}, d_new)
     push!(l.defined_tables, d_new)
     l.active_table = d_new
 
@@ -665,41 +666,20 @@ end
 # Array #
 #########
 
-function push!!(v::Vector, el)
-    # Since these types are typically non-inferable, they are a big invalidation risk,
-    # and since it's used by the package-loading infrastructure the cost of invalidation
-    # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
-    # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
-    # is no ambiguity about what types of objects will be created.
-    T = eltype(v)
-    t = typeof(el)
-    if el isa T || t === T
-        push!(v, el::T)
-        return v
-    elseif T === Union{}
-        out = Vector{t}(undef, 1)
-        out[1] = el
-        return out
-    else
-        if T isa Union
-            newT = Any
-        else
-            newT = Union{T, typeof(el)}
-        end
-        new = Array{newT}(undef, length(v))
-        copy!(new, v)
-        return push!(new, el)
+function copyto_typed!(a::Vector{T}, b::Vector) where T
+    for i in 1:length(b)
+        a[i] = b[i]::T
     end
+    return nothing
 end
 
-function parse_array(l::Parser)::Err{Vector}
+function parse_array(l::Parser{Dates})::Err{Vector} where Dates
     skip_ws_nl(l)
-    array = Vector{Union{}}()
+    array = Vector{Any}()
     empty_array = accept(l, ']')
     while !empty_array
         v = @try parse_value(l)
-        # TODO: Worth to function barrier this?
-        array = push!!(array, v)
+        array = push!(array, v)
         # There can be an arbitrary number of newlines and comments before a value and before the closing bracket.
         skip_ws_nl(l)
         comma = accept(l, ',')
@@ -709,8 +689,40 @@ function parse_array(l::Parser)::Err{Vector}
             return ParserError(ErrExpectedCommaBetweenItemsArray)
         end
     end
-    push!(l.static_arrays, array)
-    return array
+    # check for static type throughout array
+    T = !isempty(array) ? typeof(array[1]) : Union{}
+    for el in array
+        if typeof(el) != T
+            T = Any
+            break
+        end
+    end
+    if T === Any
+        new = array
+    elseif T === String
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Bool
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Int64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === UInt64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Float64
+        new = Array{T}(undef, length(array))
+        copyto_typed!(new, array)
+    elseif T === Union{}
+        new = Any[]
+    elseif (T === TOMLDict) || (T == BigInt) || (T === UInt128) || (T === Int128) || (T <: Vector) ||
+        (T === Dates.Date) || (T === Dates.Time) || (T === Dates.DateTime)
+        # do nothing, leave as Vector{Any}
+        new = array
+    else @assert false end
+    push!(l.static_arrays, new)
+    return new
 end
 
 
@@ -1021,8 +1033,7 @@ function parse_datetime(l)
     return try_return_datetime(l, year, month, day, h, m, s, ms)
 end
 
-function try_return_datetime(p, year, month, day, h, m, s, ms)
-    Dates = p.Dates
+function try_return_datetime(p::Parser{Dates}, year, month, day, h, m, s, ms) where Dates
     if Dates !== nothing
         try
             return Dates.DateTime(year, month, day, h, m, s, ms)
@@ -1035,8 +1046,7 @@ function try_return_datetime(p, year, month, day, h, m, s, ms)
     end
 end
 
-function try_return_date(p, year, month, day)
-    Dates = p.Dates
+function try_return_date(p::Parser{Dates}, year, month, day) where Dates
     if Dates !== nothing
         try
             return Dates.Date(year, month, day)
@@ -1058,8 +1068,7 @@ function parse_local_time(l::Parser)
     return try_return_time(l, h, m, s, ms)
 end
 
-function try_return_time(p, h, m, s, ms)
-    Dates = p.Dates
+function try_return_time(p::Parser{Dates}, h, m, s, ms) where Dates
     if Dates !== nothing
         try
             return Dates.Time(h, m, s, ms)
diff --git a/base/tuple.jl b/base/tuple.jl
index cc22ea75c041c..fc213410cfd7c 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -503,7 +503,7 @@ end
 _findfirst_rec(f, i::Int, ::Tuple{}) = nothing
 _findfirst_rec(f, i::Int, t::Tuple) = (@inline; f(first(t)) ? i : _findfirst_rec(f, i+1, tail(t)))
 function _findfirst_loop(f::Function, t)
-    for i in 1:length(t)
+    for i in eachindex(t)
         f(t[i]) && return i
     end
     return nothing
@@ -537,7 +537,7 @@ function _isequal(t1::Tuple{Any,Vararg{Any}}, t2::Tuple{Any,Vararg{Any}})
     return isequal(t1[1], t2[1]) && _isequal(tail(t1), tail(t2))
 end
 function _isequal(t1::Any32, t2::Any32)
-    for i = 1:length(t1)
+    for i in eachindex(t1, t2)
         if !isequal(t1[i], t2[i])
             return false
         end
@@ -568,7 +568,7 @@ function _eq_missing(t1::Tuple, t2::Tuple)
 end
 function _eq(t1::Any32, t2::Any32)
     anymissing = false
-    for i = 1:length(t1)
+    for i in eachindex(t1, t2)
         eq = (t1[i] == t2[i])
         if ismissing(eq)
             anymissing = true
diff --git a/base/util.jl b/base/util.jl
index e9883650abc77..95d62c4a16e1d 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -271,15 +271,29 @@ function securezero! end
 unsafe_securezero!(p::Ptr{Cvoid}, len::Integer=1) = Ptr{Cvoid}(unsafe_securezero!(Ptr{UInt8}(p), len))
 
 """
-    Base.getpass(message::AbstractString) -> Base.SecretBuffer
+    Base.getpass(message::AbstractString; with_suffix::Bool=true) -> Base.SecretBuffer
 
 Display a message and wait for the user to input a secret, returning an `IO`
-object containing the secret.
+object containing the secret. If `with_suffix` is `true` (the default), the
+suffix `": "` will be appended to `message`.
 
 !!! info "Windows"
     Note that on Windows, the secret might be displayed as it is typed; see
     `Base.winprompt` for securely retrieving username/password pairs from a
     graphical interface.
+
+!!! compat "Julia 1.12"
+    The `with_suffix` keyword argument requires at least Julia 1.12.
+
+# Examples
+
+```julia-repl
+julia> Base.getpass("Secret")
+Secret: SecretBuffer("*******")
+
+julia> Base.getpass("Secret> "; with_suffix=false)
+Secret> SecretBuffer("*******")
+```
 """
 function getpass end
 
@@ -339,11 +353,13 @@ function with_raw_tty(f::Function, input::TTY)
     end
 end
 
-function getpass(input::TTY, output::IO, prompt::AbstractString)
+function getpass(input::TTY, output::IO, prompt::AbstractString; with_suffix::Bool=true)
     input === stdin || throw(ArgumentError("getpass only works for stdin"))
     with_raw_tty(stdin) do
-        print(output, prompt, ": ")
+        print(output, prompt)
+        with_suffix && print(output, ": ")
         flush(output)
+
         s = SecretBuffer()
         plen = 0
         while true
@@ -364,7 +380,7 @@ end
 
 # allow new getpass methods to be defined if stdin has been
 # redirected to some custom stream, e.g. in IJulia.
-getpass(prompt::AbstractString) = getpass(stdin, stdout, prompt)
+getpass(prompt::AbstractString; with_suffix::Bool=true) = getpass(stdin, stdout, prompt; with_suffix)
 
 """
     prompt(message; default="") -> Union{String, Nothing}
diff --git a/cli/Makefile b/cli/Makefile
index bbe722f6f4816..3cc0af1a76afd 100644
--- a/cli/Makefile
+++ b/cli/Makefile
@@ -25,8 +25,6 @@ else ifeq ($(OS),FreeBSD)
 LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
 else ifeq ($(OS),OpenBSD)
 LOADER_LDFLAGS += -Wl,--no-as-needed -lpthread -rdynamic -lc -Wl,--as-needed
-else ifeq ($(OS),Darwin)
-LOADER_LDFLAGS += -lSystem
 endif
 
 # Build list of dependent libraries that must be opened
@@ -152,7 +150,7 @@ $(build_bindir)/julia$(EXE): $(EXE_OBJS) $(build_shlibdir)/libjulia.$(SHLIB_EXT)
 $(build_bindir)/julia-debug$(EXE): $(EXE_DOBJS) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) | $(build_bindir)
 	@$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(DEBUGFLAGS) $(EXE_DOBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia-debug)
 
-$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION
 	sed <'$<' >'$@' -e 's/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/'
 
 clean: | $(CLEAN_TARGETS)
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
index 65a5e7621a714..af2a36cfce8ab 100644
--- a/cli/loader_lib.c
+++ b/cli/loader_lib.c
@@ -546,7 +546,7 @@ __attribute__((constructor)) void jl_load_libjulia_internal(void) {
         (*jl_codegen_exported_func_addrs[symbol_idx]) = addr;
     }
     // Next, if we're on Linux/FreeBSD, set up fast TLS.
-#if !defined(_OS_WINDOWS_) && !defined(_OS_DARWIN_) && !defined(_OS_OPENBSD_)
+#if !defined(_OS_WINDOWS_) && !defined(_OS_OPENBSD_)
     void (*jl_pgcstack_setkey)(void*, void*(*)(void)) = lookup_symbol(libjulia_internal, "jl_pgcstack_setkey");
     if (jl_pgcstack_setkey == NULL) {
         jl_loader_print_stderr("ERROR: Cannot find jl_pgcstack_setkey() function within libjulia-internal!\n");
diff --git a/contrib/bolt/.gitignore b/contrib/bolt/.gitignore
new file mode 100644
index 0000000000000..921d429130268
--- /dev/null
+++ b/contrib/bolt/.gitignore
@@ -0,0 +1,10 @@
+profiles-bolt*
+optimized.build
+toolchain
+
+bolt
+bolt_instrument
+merge_data
+copy_originals
+stage0
+stage1
diff --git a/contrib/bolt/Makefile b/contrib/bolt/Makefile
new file mode 100644
index 0000000000000..ea92ba9ff936a
--- /dev/null
+++ b/contrib/bolt/Makefile
@@ -0,0 +1,134 @@
+.PHONY: clean clean_profiles restore_originals
+
+# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
+BOLT_ARGS :=
+# Reorder basic blocks within functions
+BOLT_ARGS += -reorder-blocks=ext-tsp
+# Reorder functions within the binary
+BOLT_ARGS += -reorder-functions=cdsort
+# Split function code into hot and code regions
+BOLT_ARGS += -split-functions
+# Split as many basic blocks as possible
+BOLT_ARGS += -split-all-cold
+# Move jump tables to a separate section
+BOLT_ARGS += -jump-tables=move
+# Use regular size pages for code alignment
+BOLT_ARGS += -no-huge-pages
+# Fold functions with identical code
+BOLT_ARGS += -icf=1
+# Split using best available strategy (three-way splitting, Cache-Directed Sort)
+# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
+# BOLT_ARGS += -split-strategy=cdsplit
+# Update DWARF debug info in the final binary
+BOLT_ARGS += -update-debug-sections
+# Print optimization statistics
+BOLT_ARGS += -dyno-stats
+# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
+# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
+# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
+# which we do in the bolt target
+BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
+
+# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
+BOLT_FLAGS := $\
+	"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
+	"BOLT_LDFLAGS=-Wl,--emit-relocs"
+
+STAGE0_BUILD:=$(CURDIR)/toolchain
+STAGE1_BUILD:=$(CURDIR)/optimized.build
+
+STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
+
+PROFILE_DIR:=$(CURDIR)/profiles-bolt
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
+LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
+
+# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
+SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
+FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
+
+AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage1` to finish off the build. $\
+	You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
+	if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage1`. $\
+	You should end up with some data in $(PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
+
+$(STAGE0_BUILD) $(STAGE1_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: | $(STAGE0_BUILD)
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-BOLT && \
+	touch $@
+
+# Build with our custom flags, binary builder doesn't use them so we need to build LLVM for now.
+# We manually skip package image creation so that we can profile it
+$(STAGE1_BUILD): stage0
+stage1: export USE_BINARYBUILDER_LLVM=0
+stage1: | $(STAGE1_BUILD)
+	$(MAKE) -C $(STAGE1_BUILD) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
+								julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
+	touch $@
+
+copy_originals: stage1
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		cp $$abs_file "$$abs_file.original"; \
+	done && \
+	touch $@
+
+# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
+# as the final build uses -no-huge-pages
+bolt_instrument: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(PROFILE_DIR)/$$file-prof" -no-huge-pages; \
+		mkdir -p $$(dirname "$(PROFILE_DIR)/$$file-prof"); \
+		printf "\n"; \
+	done && \
+	touch $@
+	@echo $(AFTER_INSTRUMENT_MESSAGE)
+
+# We don't want to rebuild julia-src as then we lose the bolt instrumentation
+# So we have to manually build the sysimage and package image
+finish_stage1: stage1
+	$(MAKE) -C $(STAGE1_BUILD) julia-base-cache && \
+	$(MAKE) -C $(STAGE1_BUILD) -f sysimage.mk sysimg-release && \
+	$(MAKE) -C $(STAGE1_BUILD) -f pkgimage.mk release
+
+merge_data: bolt_instrument
+	for file in $(FILES_TO_OPTIMIZE); do \
+		profiles=$(PROFILE_DIR)/$$file-prof.*.fdata; \
+		$(LLVM_MERGEFDATA) $$profiles > "$(PROFILE_DIR)/$$file-prof.merged.fdata"; \
+	done && \
+	touch $@
+
+# The --use-old-text saves about 16 MiB of libLLVM.so size.
+# However, the rust folk found it succeeds very non-deterministically for them.
+# It tries to reuse old text segments to reduce binary size
+# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
+# That flag saves less than 1 MiB for libjulia-internal so oh well.
+bolt: merge_data
+	for file in $(FILES_TO_OPTIMIZE); do \
+        abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		$(LLVM_BOLT) "$$abs_file.original" -data "$(PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE1_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
+    done && \
+    touch $@
+
+clean_profiles:
+	rm -rf $(PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 bolt copy_originals merge_data bolt_instrument
+
+restore_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		cp -P "$$abs_file.original" $$abs_file; \
+	done
+
+delete_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE1_BUILD)/usr/lib/$$file; \
+		rm "$$abs_file.original"; \
+	done
diff --git a/contrib/bolt/README.md b/contrib/bolt/README.md
new file mode 100644
index 0000000000000..8680939ef6276
--- /dev/null
+++ b/contrib/bolt/README.md
@@ -0,0 +1,17 @@
+BOLT only works on x86_64 and arch64 on Linux.
+
+DO NOT STRIP THE RESULTING .so FILES, https://github.com/llvm/llvm-project/issues/56738.
+If you really need to, try adding `-use-gnu-stack` to `BOLT_ARGS`.
+
+To build a BOLT-optimized version of Julia run the following commands (`cd` into this directory first)
+```bash
+make stage1
+make copy_originals
+make bolt_instrument
+make finish_stage1
+make merge_data
+make bolt
+```
+After these commands finish, the optimized version of Julia will be built in the `optimized.build` directory.
+
+This doesn't align the code to support huge pages as it doesn't seem that we do that currently, this decreases the size of the .so files by 2-4mb.
diff --git a/contrib/check-whitespace.jl b/contrib/check-whitespace.jl
index e178ec8a02a38..fd3106587fb0d 100755
--- a/contrib/check-whitespace.jl
+++ b/contrib/check-whitespace.jl
@@ -18,6 +18,8 @@ const patterns = split("""
     *Makefile
 """)
 
+const is_gha = something(tryparse(Bool, get(ENV, "GITHUB_ACTIONS", "false")), false)
+
 # Note: `git ls-files` gives `/` as a path separator on Windows,
 #   so we just use `/` for all platforms.
 allow_tabs(path) =
@@ -63,8 +65,14 @@ function check_whitespace()
         for (path, lineno, msg) in sort!(collect(errors))
             if lineno == 0
                 println(stderr, "$path -- $msg")
+                if is_gha
+                    println(stdout, "::warning title=Whitespace check,file=", path, "::", msg)
+                end
             else
                 println(stderr, "$path:$lineno -- $msg")
+                if is_gha
+                    println(stdout, "::warning title=Whitespace check,file=", path, ",line=", lineno, "::", msg)
+                end
             end
         end
         exit(1)
diff --git a/contrib/download_cmake.sh b/contrib/download_cmake.sh
index 1deeb08ddded2..5cf3c579ed052 100755
--- a/contrib/download_cmake.sh
+++ b/contrib/download_cmake.sh
@@ -8,17 +8,17 @@ mkdir -p "$(dirname "$0")"/../deps/scratch
 cd "$(dirname "$0")"/../deps/scratch
 
 CMAKE_VERSION_MAJOR=3
-CMAKE_VERSION_MINOR=19
-CMAKE_VERSION_PATCH=3
+CMAKE_VERSION_MINOR=30
+CMAKE_VERSION_PATCH=1
 CMAKE_VERSION_MAJMIN=$CMAKE_VERSION_MAJOR.$CMAKE_VERSION_MINOR
 CMAKE_VERSION=$CMAKE_VERSION_MAJMIN.$CMAKE_VERSION_PATCH
 
 # listed at https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/cmake-$CMAKE_VERSION-SHA-256.txt
 # for the files cmake-$CMAKE_VERSION-macos-universal.tar.gz
 # cmake-$CMAKE_VERSION-Linux-x86_64.tar.gz and cmake-$CMAKE_VERSION-Linux-aarch64.tar.gz
-CMAKE_SHA256_DARWIN=a6b79ad05f89241a05797510e650354d74ff72cc988981cdd1eb2b3b2bda66ac
-CMAKE_SHA256_LINUX_X86_64=c18b65697e9679e5c88dccede08c323cd3d3730648e59048047bba82097e0ffc
-CMAKE_SHA256_LINUX_AARCH64=66e507c97ffb586d7ca6567890808b792c8eb004b645706df6fbf27826a395a2
+CMAKE_SHA256_DARWIN=51e12618829b811bba6f033ee8f39f6192da1b6abb20d82a7899d5134e879a4c
+CMAKE_SHA256_LINUX_X86_64=ac31f077ef3378641fa25a3cb980d21b2f083982d3149a8f2eb9154f2b53696b
+CMAKE_SHA256_LINUX_AARCH64=ad234996f8750f11d7bd0d17b03f55c434816adf1f1671aab9e8bab21a43286a
 
 PLATFORM="$(uname)-$(uname -m)"
 case $PLATFORM in
@@ -28,12 +28,12 @@ case $PLATFORM in
     echo "$CMAKE_SHA256_DARWIN  $FULLNAME.tar.gz" | shasum -a 256 -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/CMake.app/Contents/bin/cmake;;
   Linux-x86_64)
-    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    FULLNAME=cmake-$CMAKE_VERSION-linux-x86_64
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_LINUX_X86_64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
   Linux-aarch64)
-    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    FULLNAME=cmake-$CMAKE_VERSION-linux-aarch64
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_LINUX_AARCH64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index 1152e20d16842..60f7290c7a0ac 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -12,8 +12,6 @@ Sys.__init_build()
 if !isdefined(Base, :uv_eventloop)
     Base.reinit_stdio()
 end
-Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testhelpers", "FakePTYs.jl"))
-import .FakePTYs: open_fake_pty
 using Base.Meta
 
 ## Debugging options
@@ -41,6 +39,15 @@ precompile(Base.__require_prelocked, (Base.PkgId, Nothing))
 precompile(Base._require, (Base.PkgId, Nothing))
 precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int))
 precompile(Base.indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int))
+precompile(Tuple{typeof(Base.Threads.atomic_add!), Base.Threads.Atomic{Int}, Int})
+precompile(Tuple{typeof(Base.Threads.atomic_sub!), Base.Threads.Atomic{Int}, Int})
+
+# LazyArtifacts (but more generally helpful)
+precompile(Tuple{Type{Base.Val{x} where x}, Module})
+precompile(Tuple{Type{NamedTuple{(:honor_overrides,), T} where T<:Tuple}, Tuple{Bool}})
+precompile(Tuple{typeof(Base.unique!), Array{String, 1}})
+precompile(Tuple{typeof(Base.invokelatest), Any})
+precompile(Tuple{typeof(Base.vcat), Array{String, 1}, Array{String, 1}})
 
 # Pkg loading
 precompile(Tuple{typeof(Base.Filesystem.normpath), String, String, Vararg{String}})
@@ -163,6 +170,8 @@ for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter())
     push!(Expr[], Expr(:return, false))
     vcat(String[], String[])
     k, v = (:hello => nothing)
+    Base.print_time_imports_report(Base)
+    Base.print_time_imports_report_init(Base)
 
     # Preferences uses these
     get(Dict{String,Any}(), "missing", nothing)
@@ -174,6 +183,11 @@ for match = Base._methods(+, (Int, Int), -1, Base.get_world_counter())
     # interactive startup uses this
     write(IOBuffer(), "")
 
+    # not critical, but helps hide unrelated compilation from @time when using --trace-compile
+    foo() = rand(2,2) * rand(2,2)
+    @time foo()
+    @time foo()
+
     break   # only actually need to do this once
 end
 """
@@ -333,8 +347,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
         print_state("step1" => "F$n_step1")
         return :ok
     end
-    Base.errormonitor(step1)
-    !PARALLEL_PRECOMPILATION && wait(step1)
+    PARALLEL_PRECOMPILATION ? bind(statements_step1, step1) : wait(step1)
 
     # Create a staging area where all the loaded packages are available
     PrecompileStagingArea = Module()
@@ -348,7 +361,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
     # Make statements unique
     statements = Set{String}()
     # Execute the precompile statements
-    for sts in [statements_step1,], statement in sts
+    for statement in statements_step1
         # Main should be completely clean
         occursin("Main.", statement) && continue
         Base.in!(statement, statements) && continue
@@ -384,6 +397,7 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
     println()
     # Seems like a reasonable number right now, adjust as needed
     # comment out if debugging script
+    have_repl = false
     n_succeeded > (have_repl ? 650 : 90) || @warn "Only $n_succeeded precompile statements"
 
     fetch(step1) == :ok || throw("Step 1 of collecting precompiles failed.")
@@ -394,7 +408,6 @@ generate_precompile_statements() = try # Make sure `ansi_enablecursor` is printe
 finally
     fancyprint && print(ansi_enablecursor)
     GC.gc(true); GC.gc(false); # reduce memory footprint
-    return
 end
 
 generate_precompile_statements()
diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl
index df17b967c1ed7..8b1eb55cbe4f4 100755
--- a/contrib/julia-config.jl
+++ b/contrib/julia-config.jl
@@ -67,9 +67,7 @@ function ldlibs(doframework)
         "julia"
     end
     if Sys.isunix()
-        return "-Wl,-rpath,$(shell_escape(libDir())) " *
-            (Sys.isapple() ? string() : "-Wl,-rpath,$(shell_escape(private_libDir())) ") *
-            "-l$libname"
+        return "-L$(shell_escape(private_libDir())) -Wl,-rpath,$(shell_escape(libDir())) -Wl,-rpath,$(shell_escape(private_libDir())) -l$libname"
     else
         return "-l$libname -lopenlibm"
     end
diff --git a/contrib/juliac-buildscript.jl b/contrib/juliac-buildscript.jl
new file mode 100644
index 0000000000000..50f96198c416b
--- /dev/null
+++ b/contrib/juliac-buildscript.jl
@@ -0,0 +1,277 @@
+# Script to run in the process that generates juliac's object file output
+
+inputfile = ARGS[1]
+output_type = ARGS[2]
+add_ccallables = ARGS[3] == "true"
+
+# Initialize some things not usually initialized when output is requested
+Sys.__init__()
+Base.init_depot_path()
+Base.init_load_path()
+Base.init_active_project()
+task = current_task()
+task.rngState0 = 0x5156087469e170ab
+task.rngState1 = 0x7431eaead385992c
+task.rngState2 = 0x503e1d32781c2608
+task.rngState3 = 0x3a77f7189200c20b
+task.rngState4 = 0x5502376d099035ae
+uuid_tuple = (UInt64(0), UInt64(0))
+ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple)
+ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
+
+# Patch methods in Core and Base
+
+@eval Core begin
+    DomainError(@nospecialize(val), @nospecialize(msg::AbstractString)) = (@noinline; $(Expr(:new, :DomainError, :val, :msg)))
+end
+
+(f::Base.RedirectStdStream)(io::Core.CoreSTDOUT) = Base._redirect_io_global(io, f.unix_fd)
+
+@eval Base begin
+    _assert_tostring(msg) = ""
+    reinit_stdio() = nothing
+    JuliaSyntax.enable_in_core!() = nothing
+    init_active_project() = ACTIVE_PROJECT[] = nothing
+    set_active_project(projfile::Union{AbstractString,Nothing}) = ACTIVE_PROJECT[] = projfile
+    disable_library_threading() = nothing
+    start_profile_listener() = nothing
+    @inline function invokelatest(f::F, args...; kwargs...) where F
+        return f(args...; kwargs...)
+    end
+    function sprint(f::F, args::Vararg{Any,N}; context=nothing, sizehint::Integer=0) where {F<:Function,N}
+        s = IOBuffer(sizehint=sizehint)
+        if context isa Tuple
+            f(IOContext(s, context...), args...)
+        elseif context !== nothing
+            f(IOContext(s, context), args...)
+        else
+            f(s, args...)
+        end
+        String(_unsafe_take!(s))
+    end
+    function show_typeish(io::IO, @nospecialize(T))
+        if T isa Type
+            show(io, T)
+        elseif T isa TypeVar
+            print(io, (T::TypeVar).name)
+        else
+            print(io, "?")
+        end
+    end
+    function show(io::IO, T::Type)
+        if T isa DataType
+            print(io, T.name.name)
+            if T !== T.name.wrapper && length(T.parameters) > 0
+                print(io, "{")
+                first = true
+                for p in T.parameters
+                    if !first
+                        print(io, ", ")
+                    end
+                    first = false
+                    if p isa Int
+                        show(io, p)
+                    elseif p isa Type
+                        show(io, p)
+                    elseif p isa Symbol
+                        print(io, ":")
+                        print(io, p)
+                    elseif p isa TypeVar
+                        print(io, p.name)
+                    else
+                        print(io, "?")
+                    end
+                end
+                print(io, "}")
+            end
+        elseif T isa Union
+            print(io, "Union{")
+            show_typeish(io, T.a)
+            print(io, ", ")
+            show_typeish(io, T.b)
+            print(io, "}")
+        elseif T isa UnionAll
+            print(io, T.body::Type)
+            print(io, " where ")
+            print(io, T.var.name)
+        end
+    end
+    show_type_name(io::IO, tn::Core.TypeName) = print(io, tn.name)
+
+    mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted; dims=:, init=_InitialValue()) where {F, F2} =
+    _mapreduce_dim(f, op, init, A, dims)
+    mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted...; kw...) where {F, F2} =
+        reduce(op, map(f, A...); kw...)
+
+    _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
+        mapfoldl_impl(f, op, nt, A)
+
+    _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, ::Colon) where {F, F2} =
+        _mapreduce(f, op, IndexStyle(A), A)
+
+    _mapreduce_dim(f::F, op::F2, nt, A::AbstractArrayOrBroadcasted, dims) where {F, F2} =
+        mapreducedim!(f, op, reducedim_initarray(A, dims, nt), A)
+
+    _mapreduce_dim(f::F, op::F2, ::_InitialValue, A::AbstractArrayOrBroadcasted, dims) where {F,F2} =
+        mapreducedim!(f, op, reducedim_init(f, op, A, dims), A)
+
+    mapreduce_empty_iter(f::F, op::F2, itr, ItrEltype) where {F, F2} =
+        reduce_empty_iter(MappingRF(f, op), itr, ItrEltype)
+        mapreduce_first(f::F, op::F2, x) where {F,F2} = reduce_first(op, f(x))
+
+    _mapreduce(f::F, op::F2, A::AbstractArrayOrBroadcasted) where {F,F2} = _mapreduce(f, op, IndexStyle(A), A)
+    mapreduce_empty(::typeof(identity), op::F, T) where {F} = reduce_empty(op, T)
+    mapreduce_empty(::typeof(abs), op::F, T) where {F}     = abs(reduce_empty(op, T))
+    mapreduce_empty(::typeof(abs2), op::F, T) where {F}    = abs2(reduce_empty(op, T))
+end
+@eval Base.Unicode begin
+    function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform::F = identity) where F
+        nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
+        buffer = Base.StringVector(nwords*4)
+        nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
+        nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options)
+        nbytes < 0 && utf8proc_error(nbytes)
+        return String(resize!(buffer, nbytes))
+    end
+end
+@eval Base.GMP begin
+    function __init__()
+        try
+            ccall((:__gmp_set_memory_functions, libgmp), Cvoid,
+                (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}),
+                cglobal(:jl_gc_counted_malloc),
+                cglobal(:jl_gc_counted_realloc_with_old_size),
+                cglobal(:jl_gc_counted_free_with_size))
+            ZERO.alloc, ZERO.size, ZERO.d = 0, 0, C_NULL
+            ONE.alloc, ONE.size, ONE.d = 1, 1, pointer(_ONE)
+        catch ex
+            Base.showerror_nostdio(ex, "WARNING: Error during initialization of module GMP")
+        end
+        # This only works with a patched version of GMP, ignore otherwise
+        try
+            ccall((:__gmp_set_alloc_overflow_function, libgmp), Cvoid,
+                (Ptr{Cvoid},),
+                cglobal(:jl_throw_out_of_memory_error))
+            ALLOC_OVERFLOW_FUNCTION[] = true
+        catch ex
+            # ErrorException("ccall: could not find function...")
+            if typeof(ex) != ErrorException
+                rethrow()
+            end
+        end
+    end
+end
+@eval Base.Sort begin
+    issorted(itr;
+        lt::T=isless, by::F=identity, rev::Union{Bool,Nothing}=nothing, order::Ordering=Forward) where {T,F} =
+        issorted(itr, ord(lt,by,rev,order))
+end
+@eval Base.TOML begin
+    function try_return_datetime(p, year, month, day, h, m, s, ms)
+        return DateTime(year, month, day, h, m, s, ms)
+    end
+    function try_return_date(p, year, month, day)
+        return Date(year, month, day)
+    end
+    function parse_local_time(l::Parser)
+        h = @try parse_int(l, false)
+        h in 0:23 || return ParserError(ErrParsingDateTime)
+        _, m, s, ms = @try _parse_local_time(l, true)
+        # TODO: Could potentially parse greater accuracy for the
+        # fractional seconds here.
+        return try_return_time(l, h, m, s, ms)
+    end
+    function try_return_time(p, h, m, s, ms)
+        return Time(h, m, s, ms)
+    end
+end
+
+# Load user code
+
+import Base.Experimental.entrypoint
+
+let mod = Base.include(Base.__toplevel__, inputfile)
+    if !isa(mod, Module)
+        mod = Main
+    end
+    if output_type == "--output-exe" && isdefined(mod, :main) && !add_ccallables
+        entrypoint(mod.main, ())
+    end
+    #entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{Base.SubString{String}, 1}, String))
+    #entrypoint(join, (Base.GenericIOBuffer{Memory{UInt8}}, Array{String, 1}, Char))
+    entrypoint(Base.task_done_hook, (Task,))
+    entrypoint(Base.wait, ())
+    entrypoint(Base.trypoptask, (Base.StickyWorkqueue,))
+    entrypoint(Base.checktaskempty, ())
+    if add_ccallables
+        ccall(:jl_add_ccallable_entrypoints, Cvoid, ())
+    end
+end
+
+# Additional method patches depending on whether user code loads certain stdlibs
+
+let loaded = Symbol.(Base.loaded_modules_array())  # TODO better way to do this
+    if :SparseArrays in loaded
+        using SparseArrays
+        @eval SparseArrays.CHOLMOD begin
+            function __init__()
+                ccall((:SuiteSparse_config_malloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_malloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_calloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_calloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_realloc_func_set, :libsuitesparseconfig),
+                    Cvoid, (Ptr{Cvoid},), cglobal(:jl_realloc, Ptr{Cvoid}))
+                ccall((:SuiteSparse_config_free_func_set, :libsuitesparseconfig),
+                Cvoid, (Ptr{Cvoid},), cglobal(:jl_free, Ptr{Cvoid}))
+            end
+        end
+    end
+    if :Artifacts in loaded
+        using Artifacts
+        @eval Artifacts begin
+            function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, _::Val{lazyartifacts}) where lazyartifacts
+                moduleroot = Base.moduleroot(__module__)
+                if haskey(Base.module_keys, moduleroot)
+                    # Process overrides for this UUID, if we know what it is
+                    process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid)
+                end
+
+                # If the artifact exists, we're in the happy path and we can immediately
+                # return the path to the artifact:
+                dirs = artifact_paths(hash; honor_overrides=true)
+                for dir in dirs
+                    if isdir(dir)
+                        return jointail(dir, path_tail)
+                    end
+                end
+            end
+        end
+    end
+    if :Pkg in loaded
+        using Pkg
+        @eval Pkg begin
+            __init__() = rand() #TODO, methods that do nothing don't get codegened
+        end
+    end
+    if :StyledStrings in loaded
+        using StyledStrings
+        @eval StyledStrings begin
+            __init__() = rand()
+        end
+    end
+end
+
+empty!(Core.ARGS)
+empty!(Base.ARGS)
+empty!(LOAD_PATH)
+empty!(DEPOT_PATH)
+empty!(Base.TOML_CACHE.d)
+Base.TOML.reinit!(Base.TOML_CACHE.p, "")
+Base.ACTIVE_PROJECT[] = nothing
+@eval Base begin
+    PROGRAM_FILE = ""
+end
+@eval Sys begin
+    BINDIR = ""
+    STDLIB = ""
+end
diff --git a/contrib/juliac.jl b/contrib/juliac.jl
new file mode 100644
index 0000000000000..61e0e91958667
--- /dev/null
+++ b/contrib/juliac.jl
@@ -0,0 +1,110 @@
+# Julia compiler wrapper script
+# NOTE: The interface and location of this script are considered unstable/experimental
+
+cmd = Base.julia_cmd()
+cmd = `$cmd --startup-file=no --history-file=no`
+output_type = nothing  # exe, sharedlib, sysimage
+trim = nothing
+outname = nothing
+file = nothing
+add_ccallables = false
+
+help = findfirst(x->x == "--help", ARGS)
+if help !== nothing
+    println(
+        """
+        Usage: julia juliac.jl [--output-exe | --output-lib | --output-sysimage] <name> [options] <file.jl>
+        --trim=<no,safe,unsafe,unsafe-warn>  Only output code statically determined to be reachable
+        --compile-ccallable  Include all methods marked `@ccallable` in output
+        --verbose            Request verbose output
+        """)
+    exit(0)
+end
+
+let i = 1
+    while i <= length(ARGS)
+        arg = ARGS[i]
+        if arg == "--output-exe" || arg == "--output-lib" || arg == "--output-sysimage"
+            isnothing(output_type) || error("Multiple output types specified")
+            global output_type = arg
+            i == length(ARGS) && error("Output specifier requires an argument")
+            global outname = ARGS[i+1]
+            i += 1
+        elseif startswith(arg, "--trim")
+            arg = split(arg, '=')
+            if length(arg) == 1
+                global trim = "safe"
+            else
+                global trim = arg[2]
+            end
+        elseif arg == "--compile-ccallable"
+            global add_ccallables = true
+        else
+            if arg[1] == '-' || !isnothing(file)
+                println("Unexpected argument `$arg`")
+                exit(1)
+            end
+            global file = arg
+        end
+        i += 1
+    end
+end
+
+isnothing(outname) && error("No output file specified")
+isnothing(file) && error("No input file specified")
+
+absfile = abspath(file)
+cflags = readchomp(`$(cmd) $(joinpath(Sys.BINDIR, Base.DATAROOTDIR,"julia", "julia-config.jl")) --cflags `)
+cflags = Base.shell_split(cflags)
+allflags = readchomp(`$(cmd) $(joinpath(Sys.BINDIR, Base.DATAROOTDIR,"julia", "julia-config.jl")) --allflags`)
+allflags = Base.shell_split(allflags)
+tmpdir = mktempdir(cleanup=false)
+initsrc_path = joinpath(tmpdir, "init.c")
+init_path = joinpath(tmpdir, "init.a")
+img_path = joinpath(tmpdir, "img.a")
+bc_path = joinpath(tmpdir, "img-bc.a")
+
+open(initsrc_path, "w") do io
+    print(io, """
+              #include <julia.h>
+              __attribute__((constructor)) void static_init(void) {
+                  if (jl_is_initialized())
+                      return;
+                  julia_init(JL_IMAGE_IN_MEMORY);
+                  jl_exception_clear();
+              }
+              """)
+end
+
+static_call_graph_arg() = isnothing(trim) ?  `` : `--trim=$(trim)`
+is_verbose() = verbose ? `--verbose-compilation=yes` : ``
+cmd = addenv(`$cmd --project=$(Base.active_project()) --output-o $img_path --output-incremental=no --strip-ir --strip-metadata $(static_call_graph_arg()) $(joinpath(@__DIR__,"juliac-buildscript.jl")) $absfile $output_type $add_ccallables`, "OPENBLAS_NUM_THREADS" => 1, "JULIA_NUM_THREADS" => 1)
+
+if !success(pipeline(cmd; stdout, stderr))
+    println(stderr, "\nFailed to compile $file")
+    exit(1)
+end
+
+run(`cc $(cflags) -g -c -o $init_path $initsrc_path`)
+
+if output_type == "--output-lib" || output_type == "--output-sysimage"
+    of, ext = splitext(outname)
+    soext = "." * Base.BinaryPlatforms.platform_dlext()
+    if ext == ""
+        outname = of * soext
+    end
+end
+
+julia_libs = Base.shell_split(Base.isdebugbuild() ? "-ljulia-debug -ljulia-internal-debug" : "-ljulia -ljulia-internal")
+try
+    if output_type == "--output-lib"
+        run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path  -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path  $(julia_libs)`)
+    elseif output_type == "--output-sysimage"
+        run(`cc $(allflags) -o $outname -shared -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path  -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE)             $(julia_libs)`)
+    else
+        run(`cc $(allflags) -o $outname -Wl,$(Base.Linking.WHOLE_ARCHIVE) $img_path -Wl,$(Base.Linking.NO_WHOLE_ARCHIVE) $init_path $(julia_libs)`)
+    end
+catch
+    println("\nCompilation failed.")
+    exit(1)
+end
diff --git a/contrib/pgo-lto-bolt/.gitignore b/contrib/pgo-lto-bolt/.gitignore
new file mode 100644
index 0000000000000..1b29279acc0da
--- /dev/null
+++ b/contrib/pgo-lto-bolt/.gitignore
@@ -0,0 +1,14 @@
+stage0*
+stage1*
+stage2*
+bolt
+bolt_instrument
+merge_data
+copy_originals
+
+profiles
+profiles-bolt
+
+toolchain
+pgo-instrumented.build
+optimized.build
diff --git a/contrib/pgo-lto-bolt/Makefile b/contrib/pgo-lto-bolt/Makefile
new file mode 100644
index 0000000000000..2114b14991184
--- /dev/null
+++ b/contrib/pgo-lto-bolt/Makefile
@@ -0,0 +1,188 @@
+.PHONY: clean clean_profiles restore_originals
+
+# See the makefiles in contrib/bolt and contrib/pgo-lto for more information.
+
+# Settings taken from https://github.com/rust-lang/rust/blob/master/src/tools/opt-dist/src/bolt.rs
+BOLT_ARGS :=
+# Reorder basic blocks within functions
+BOLT_ARGS += -reorder-blocks=ext-tsp
+# Reorder functions within the binary
+BOLT_ARGS += -reorder-functions=cdsort
+# Split function code into hot and code regions
+BOLT_ARGS += -split-functions
+# Split as many basic blocks as possible
+BOLT_ARGS += -split-all-cold
+# Move jump tables to a separate section
+BOLT_ARGS += -jump-tables=move
+# Use regular size pages for code alignment
+BOLT_ARGS += -no-huge-pages
+# Fold functions with identical code
+BOLT_ARGS += -icf=1
+# Split using best available strategy (three-way splitting, Cache-Directed Sort)
+# Disabled for libjulia-internal till https://github.com/llvm/llvm-project/issues/89508 is fixed
+# BOLT_ARGS += -split-strategy=cdsplit
+# Update DWARF debug info in the final binary
+BOLT_ARGS += -update-debug-sections
+# Print optimization statistics
+BOLT_ARGS += -dyno-stats
+# BOLT doesn't fully support computed gotos, https://github.com/llvm/llvm-project/issues/89117
+# Use escaped regex as the name BOLT recognises is often a bit different, e.g. apply_cl/1(*2)
+# This doesn't actually seem to do anything, the actual mitigation is not using --use-old-text
+# which we do in the bolt target
+BOLT_ARGS += -skip-funcs=.\*apply_cl.\*
+
+# -fno-reorder-blocks-and-partition is needed on gcc >= 8.
+BOLT_FLAGS := $\
+	"BOLT_CFLAGS_GCC+=-fno-reorder-blocks-and-partition" $\
+	"BOLT_LDFLAGS=-Wl,--emit-relocs"
+
+STAGE0_BUILD:=$(CURDIR)/toolchain
+STAGE1_BUILD:=$(CURDIR)/pgo-instrumented.build
+STAGE2_BUILD:=$(CURDIR)/optimized.build
+
+STAGE0_BINARIES:=$(STAGE0_BUILD)/usr/bin/
+STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/
+
+BOLT_PROFILE_DIR:=$(CURDIR)/profiles-bolt
+PGO_PROFILE_DIR:=$(CURDIR)/profiles
+PGO_PROFILE_FILE:=$(PGO_PROFILE_DIR)/merged.prof
+PGO_PROFRAW_FILES:=$(wildcard $(PGO_PROFILE_DIR)/*.profraw)
+JULIA_ROOT:=$(CURDIR)/../..
+
+LLVM_BOLT:=$(STAGE0_BINARIES)llvm-bolt
+LLVM_MERGEFDATA:=$(STAGE0_BINARIES)merge-fdata
+LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
+LLVM_PROFDATA:=$(STAGE0_TOOLS)llvm-profdata
+LLVM_OBJCOPY:=$(STAGE0_TOOLS)llvm-objcopy
+
+# If you add new files to optimize, you need to add BOLT_LDFLAGS and BOLT_CFLAGS to the build of your new file.
+SYMLINKS_TO_OPTIMIZE := libLLVM.so libjulia-internal.so libjulia-codegen.so
+FILES_TO_OPTIMIZE := $(shell for file in $(SYMLINKS_TO_OPTIMIZE); do readlink $(STAGE1_BUILD)/usr/lib/$$file; done)
+
+AFTER_INSTRUMENT_MESSAGE:='Run `make finish_stage2` to finish off the build. $\
+	You can now optionally collect more profiling data by running Julia with an appropriate workload, $\
+	if you wish, run `make clean_profiles` before doing so to remove any profiling data generated by `make finish_stage2`. $\
+	You should end up with some data in $(BOLT_PROFILE_DIR). Afterwards run `make merge_data && make bolt`.'
+
+# When building a single libLLVM.so we need to increase -vp-counters-per-site
+# significantly
+COUNTERS_PER_SITE:=6
+# Note: profile counters are not atomic by default, https://discourse.llvm.org/t/profile-guided-optimization-pgo-related-questions-and-suggestions/75232/5
+
+AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for use in PGO by running Julia $\
+	with an appropriate workload. If you wish, run `make clean_profiles` before doing so to remove any profiling data $\
+	generated by building Julia. You should end up with about 15MB of data in $(PGO_PROFILE_DIR). $\
+	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
+	`make top`. Afterwards run `make stage2`.'
+
+TOOLCHAIN_FLAGS = $\
+	"CC=$(STAGE0_TOOLS)clang" $\
+	"CXX=$(STAGE0_TOOLS)clang++" $\
+	"LD=$(STAGE0_TOOLS)ld.lld" $\
+	"AR=$(STAGE0_TOOLS)llvm-ar" $\
+	"RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\
+	"CFLAGS+=$(PGO_CFLAGS)" $\
+	"CXXFLAGS+=$(PGO_CXXFLAGS)" $\
+	"LDFLAGS+=-fuse-ld=lld $(PGO_LDFLAGS)"
+
+$(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
+	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
+
+stage0: export USE_BINARYBUILDER_LLVM=1
+stage0: | $(STAGE0_BUILD)
+	# Turn [cd]tors into init/fini_array sections in libclang_rt, since lld
+	# doesn't do that, and otherwise the profile constructor is not executed
+	$(MAKE) -C $(STAGE0_BUILD)/deps install-clang install-llvm install-lld install-llvm-tools install-BOLT && \
+	find $< -name 'libclang_rt.profile-*.a' -exec $(LLVM_OBJCOPY) --rename-section .ctors=.init_array --rename-section .dtors=.fini_array {} + && \
+	touch $@
+
+$(STAGE1_BUILD): stage0
+stage1: PGO_CFLAGS:=-fprofile-generate=$(PGO_PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
+stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PGO_PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
+stage1: PGO_LDFLAGS:=-flto=thin -fprofile-generate=$(PGO_PROFILE_DIR)
+stage1: export USE_BINARYBUILDER_LLVM=0
+stage1: | $(STAGE1_BUILD)
+	$(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
+	@echo $(AFTER_STAGE1_MESSAGE)
+
+stage2: PGO_CFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+stage2: PGO_CXXFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+stage2: PGO_LDFLAGS:=-flto=thin -fprofile-use=$(PGO_PROFILE_FILE) -Wl,--icf=safe
+stage2: export USE_BINARYBUILDER_LLVM=0
+stage2: $(PGO_PROFILE_FILE) | $(STAGE2_BUILD)
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) $(BOLT_FLAGS) julia-src-release julia-symlink julia-libccalltest \
+								julia-libccalllazyfoo julia-libccalllazybar julia-libllvmcalltest && \
+	touch $@
+
+copy_originals: stage2
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		cp $$abs_file "$$abs_file.original"; \
+	done && \
+	touch $@
+
+# I don't think there's any particular reason to have -no-huge-pages here, perhaps slightly more accurate profile data
+# as the final build uses -no-huge-pages
+bolt_instrument: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		$(LLVM_BOLT) "$$abs_file.original" -o $$abs_file --instrument --instrumentation-file-append-pid --instrumentation-file="$(BOLT_PROFILE_DIR)/$$file-prof" -no-huge-pages; \
+		mkdir -p $$(dirname "$(BOLT_PROFILE_DIR)/$$file-prof"); \
+		printf "\n"; \
+	done && \
+	touch $@
+	@echo $(AFTER_INSTRUMENT_MESSAGE)
+
+# We don't want to rebuild julia-src as then we lose the bolt instrumentation
+# So we have to manually build the sysimage and package image
+finish_stage2: PGO_CFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+finish_stage2: PGO_CXXFLAGS:=-fprofile-use=$(PGO_PROFILE_FILE)
+finish_stage2: PGO_LDFLAGS:=-flto=thin -fprofile-use=$(PGO_PROFILE_FILE) -Wl,--icf=safe
+finish_stage2: stage2
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) julia-base-cache && \
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) -f sysimage.mk sysimg-release && \
+	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) -f pkgimage.mk release
+
+merge_data: bolt_instrument
+	for file in $(FILES_TO_OPTIMIZE); do \
+		profiles=$(BOLT_PROFILE_DIR)/$$file-prof.*.fdata; \
+		$(LLVM_MERGEFDATA) $$profiles > "$(BOLT_PROFILE_DIR)/$$file-prof.merged.fdata"; \
+	done && \
+	touch $@
+
+# The --use-old-text saves about 16 MiB of libLLVM.so size.
+# However, the rust folk found it succeeds very non-deterministically for them.
+# It tries to reuse old text segments to reduce binary size
+# BOLT doesn't fully support computed gotos https://github.com/llvm/llvm-project/issues/89117, so we cannot use --use-old-text on libjulia-internal
+# That flag saves less than 1 MiB for libjulia-internal so oh well.
+bolt: merge_data
+	for file in $(FILES_TO_OPTIMIZE); do \
+        abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		$(LLVM_BOLT) "$$abs_file.original" -data "$(BOLT_PROFILE_DIR)/$$file-prof.merged.fdata" -o $$abs_file $(BOLT_ARGS) $$(if [ "$$file" != $(shell readlink $(STAGE2_BUILD)/usr/lib/libjulia-internal.so) ]; then echo "--use-old-text -split-strategy=cdsplit"; fi); \
+    done && \
+    touch $@
+
+clean_profiles:
+	rm -rf $(PGO_PROFILE_DIR) $(BOLT_PROFILE_DIR)
+
+clean:
+	rm -f stage0 stage1 stage2 $(PGO_PROFILE_FILE) bolt copy_originals merge_data bolt_instrument
+
+restore_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		cp -P "$$abs_file.original" $$abs_file; \
+	done
+
+delete_originals: copy_originals
+	for file in $(FILES_TO_OPTIMIZE); do \
+		abs_file=$(STAGE2_BUILD)/usr/lib/$$file; \
+		rm "$$abs_file.original"; \
+	done
+
+$(PGO_PROFILE_FILE): stage1 $(PGO_PROFRAW_FILES)
+	$(LLVM_PROFDATA) merge -output=$@ $(PGO_PROFRAW_FILES)
+
+# show top 50 functions
+top: $(PGO_PROFILE_FILE)
+	$(LLVM_PROFDATA) show --topn=50 $< | $(LLVM_CXXFILT)
diff --git a/contrib/pgo-lto-bolt/README.md b/contrib/pgo-lto-bolt/README.md
new file mode 100644
index 0000000000000..ab574907c292f
--- /dev/null
+++ b/contrib/pgo-lto-bolt/README.md
@@ -0,0 +1,18 @@
+BOLT only works on x86_64 and arch64 on Linux.
+
+DO NOT STRIP THE RESULTING .so FILES, https://github.com/llvm/llvm-project/issues/56738.
+If you really need to, try adding `-use-gnu-stack` to `BOLT_ARGS`.
+
+To build a PGO+LTO+BOLT version of Julia run the following commands (`cd` into this directory first)
+```bash
+make stage1
+make stage2
+make copy_originals
+make bolt_instrument
+make finish_stage2
+make merge_data
+make bolt
+```
+After these commands finish, the optimized version of Julia will be built in the `optimized.build` directory.
+
+This doesn't align the code to support huge pages as it doesn't seem that we do that currently, this decreases the size of the .so files by 2-4mb.
diff --git a/contrib/pgo-lto/Makefile b/contrib/pgo-lto/Makefile
index 896d41ac2e286..ddd86f5d5b39a 100644
--- a/contrib/pgo-lto/Makefile
+++ b/contrib/pgo-lto/Makefile
@@ -8,7 +8,6 @@ STAGE0_TOOLS:=$(STAGE0_BUILD)/usr/tools/
 
 PROFILE_DIR:=$(CURDIR)/profiles
 PROFILE_FILE:=$(PROFILE_DIR)/merged.prof
-PROFRAW_FILES:=$(wildcard $(PROFILE_DIR)/*.profraw)
 JULIA_ROOT:=$(CURDIR)/../..
 
 LLVM_CXXFILT:=$(STAGE0_TOOLS)llvm-cxxfilt
@@ -26,15 +25,16 @@ AFTER_STAGE1_MESSAGE:='You can now optionally collect more profiling data for us
 	Note that running extensive scripts may result in counter overflows, which can be detected by running $\
 	`make top`. Afterwards run `make stage2`.'
 
-TOOLCHAIN_FLAGS = $\
-	"CC=$(STAGE0_TOOLS)clang" $\
-	"CXX=$(STAGE0_TOOLS)clang++" $\
-	"LD=$(STAGE0_TOOLS)ld.lld" $\
-	"AR=$(STAGE0_TOOLS)llvm-ar" $\
-	"RANLIB=$(STAGE0_TOOLS)llvm-ranlib" $\
-	"CFLAGS+=$(PGO_CFLAGS)" $\
-	"CXXFLAGS+=$(PGO_CXXFLAGS)" $\
-	"LDFLAGS+=$(PGO_LDFLAGS)"
+STAGE1_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-generate=$(PROFILE_DIR)" $\
+			CFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)" $\
+			CXXFLAGS="-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)"
+STAGE2_FLAGS:=LDFLAGS="-fuse-ld=lld -flto=thin -Wl,--undefined-version -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe" $\
+			CFLAGS="-fprofile-use=$(PROFILE_FILE)" $\
+			CXXFLAGS="-fprofile-use=$(PROFILE_FILE)"
+
+COMMON_FLAGS:=USECLANG=1 USE_BINARYBUILDER_LLVM=0
+
+all: stage2 # Default target as first in file
 
 $(STAGE0_BUILD) $(STAGE1_BUILD) $(STAGE2_BUILD):
 	$(MAKE) -C $(JULIA_ROOT) O=$@ configure
@@ -48,26 +48,20 @@ stage0: | $(STAGE0_BUILD)
 	touch $@
 
 $(STAGE1_BUILD): stage0
-stage1: PGO_CFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
-stage1: PGO_CXXFLAGS:=-fprofile-generate=$(PROFILE_DIR) -Xclang -mllvm -Xclang -vp-counters-per-site=$(COUNTERS_PER_SITE)
-stage1: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-generate=$(PROFILE_DIR)
-stage1: export USE_BINARYBUILDER_LLVM=0
 stage1: | $(STAGE1_BUILD)
-	$(MAKE) -C $(STAGE1_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
+	@echo "--- Build Julia Stage 1 - with instrumentation"
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE1_FLAGS) $(MAKE) -C $(STAGE1_BUILD) $(COMMON_FLAGS) && touch $@
 	@echo $(AFTER_STAGE1_MESSAGE)
 
-stage2: PGO_CFLAGS:=-fprofile-use=$(PROFILE_FILE)
-stage2: PGO_CXXFLAGS:=-fprofile-use=$(PROFILE_FILE)
-stage2: PGO_LDFLAGS:=-fuse-ld=lld -flto=thin -fprofile-use=$(PROFILE_FILE) -Wl,--icf=safe
-stage2: export USE_BINARYBUILDER_LLVM=0
 stage2: $(PROFILE_FILE) | $(STAGE2_BUILD)
-	$(MAKE) -C $(STAGE2_BUILD) $(TOOLCHAIN_FLAGS) && touch $@
+	@echo "--- Build Julia Stage 2 - PGO + LTO optimised"
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) && touch $@
 
-install: stage2
-	$(MAKE) -C $(STAGE2_BUILD) USE_BINARYBUILDER_LLVM=0 install
+.DEFAULT: stage2
+	PATH=$(STAGE0_TOOLS):$$PATH $(STAGE2_FLAGS) $(MAKE) -C $(STAGE2_BUILD) $(COMMON_FLAGS) $@
 
-$(PROFILE_FILE): stage1 $(PROFRAW_FILES)
-	$(LLVM_PROFDATA) merge -output=$@ $(PROFRAW_FILES)
+$(PROFILE_FILE): stage1 $(wildcard $(PROFILE_DIR)/*.profraw)
+	$(LLVM_PROFDATA) merge -output=$@ $(PROFILE_DIR)/*.profraw
 
 # show top 50 functions
 top: $(PROFILE_FILE)
diff --git a/deps/BOLT.mk b/deps/BOLT.mk
new file mode 100644
index 0000000000000..34391ab10f716
--- /dev/null
+++ b/deps/BOLT.mk
@@ -0,0 +1,118 @@
+## BOLT ##
+include $(SRCDIR)/BOLT.version
+
+ifneq ($(USE_BINARYBUILDER_BOLT), 1)
+BOLT_GIT_URL:=https://github.com/llvm/llvm-project.git
+BOLT_TAR_URL=https://api.github.com/repos/llvm/llvm-project/tarball/$1
+$(eval $(call git-external,BOLT,BOLT,CMakeLists.txt,,$(SRCCACHE)))
+
+BOLT_BUILDDIR := $(BUILDDIR)/$(BOLT_SRC_DIR)/build
+
+LLVM_ENABLE_PROJECTS := bolt
+
+LLVM_CFLAGS :=
+LLVM_CXXFLAGS :=
+LLVM_CPPFLAGS :=
+LLVM_LDFLAGS :=
+LLVM_CMAKE :=
+
+LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)"
+
+# Otherwise LLVM will translate \\ to / on mingw
+LLVM_CMAKE += -DLLVM_WINDOWS_PREFER_FORWARD_SLASH=False
+
+# Allow adding LLVM specific flags
+LLVM_CFLAGS += $(CFLAGS)
+LLVM_CXXFLAGS += $(CXXFLAGS)
+LLVM_CXXFLAGS += $(LLVM_CXXFLAGS)
+LLVM_CPPFLAGS += $(CPPFLAGS)
+LLVM_LDFLAGS += $(LDFLAGS)
+LLVM_LDFLAGS += $(LLVM_LDFLAGS)
+LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING=host -DCMAKE_BUILD_TYPE=Release
+LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=FORCE_ON -DZLIB_ROOT="$(build_prefix)"
+
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off
+
+ifeq ($(OS), WINNT)
+LLVM_CPPFLAGS += -D__USING_SJLJ_EXCEPTIONS__ -D__CRT__NO_INLINE
+endif # OS == WINNT
+ifneq ($(HOSTCC),$(CC))
+LLVM_CMAKE += -DCROSS_TOOLCHAIN_FLAGS_NATIVE="-DCMAKE_C_COMPILER=$$(which $(HOSTCC));-DCMAKE_CXX_COMPILER=$$(which $(HOSTCXX))"
+
+# Defaults to off when crosscompiling, starting from LLVM 18
+LLVM_CMAKE += -DBOLT_ENABLE_RUNTIME=ON
+endif
+ifeq ($(OS), emscripten)
+LLVM_CMAKE += -DCMAKE_TOOLCHAIN_FILE=$(EMSCRIPTEN)/cmake/Modules/Platform/Emscripten.cmake -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_ENABLE_THREADS=OFF -DLLVM_BUILD_UTILS=OFF
+endif # OS == emscripten
+
+ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
+ifeq (${USECLANG},0)
+LLVM_CXXFLAGS += -mminimal-toc
+endif
+endif
+
+ifeq ($(fPIC),)
+LLVM_CMAKE += -DLLVM_ENABLE_PIC=OFF
+endif
+
+LLVM_CMAKE += -DCMAKE_C_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CFLAGS)" \
+	-DCMAKE_CXX_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CXXFLAGS)"
+ifeq ($(OS),Darwin)
+# Explicitly use the default for -mmacosx-version-min=10.9 and later
+LLVM_CMAKE += -DLLVM_ENABLE_LIBCXX=ON
+endif
+
+LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
+	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS)"
+
+ifeq ($(USE_SYSTEM_ZLIB), 0)
+$(BOLT_BUILDDIR)/build-configured: | $(build_prefix)/manifest/zlib
+endif
+
+$(BOLT_BUILDDIR)/build-configured: $(SRCCACHE)/$(BOLT_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+		$(CMAKE) $(SRCCACHE)/$(BOLT_SRC_DIR)/llvm $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LLVM_CMAKE) \
+		|| { echo '*** To install a newer version of cmake, run contrib/download_cmake.sh ***' && false; }
+	echo 1 > $@
+
+$(BOLT_BUILDDIR)/build-compiled: $(BOLT_BUILDDIR)/build-configured
+	cd $(BOLT_BUILDDIR) && \
+		$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build . --target bolt)
+	echo 1 > $@
+
+$(BOLT_BUILDDIR)/build-checked: $(BOLT_BUILDDIR)/build-compiled
+ifeq ($(OS),$(BUILD_OS))
+	cd $(BOLT_BUILDDIR) && \
+		  $(CMAKE) --build . --target check-bolt
+endif
+	echo 1 > $@
+
+BOLT_INSTALL = \
+	cd $1 && mkdir -p $2$$(build_depsbindir) && \
+	$$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P tools/bolt/cmake_install.cmake
+
+$(eval $(call staged-install, \
+	bolt,$$(BOLT_SRC_DIR)/build, \
+	BOLT_INSTALL,,,))
+
+clean-bolt:
+	-rm -f $(BOLT_BUILDDIR)/build-configured $(BOLT_BUILDDIR)/build-compiled
+	-$(MAKE) -C $(BOLT_BUILDDIR) clean
+
+get-bolt: $(BOLT_SRC_FILE)
+extract-bolt: $(SRCCACHE)/$(BOLT_SRC_DIR)/source-extracted
+configure-bolt: $(BOLT_BUILDDIR)/build-configured
+compile-bolt: $(BOLT_BUILDDIR)/build-compiled
+fastcheck-bolt: #none
+check-bolt: $(BOLT_BUILDDIR)/build-checked
+
+else # USE_BINARYBUILDER_BOLT
+
+$(eval $(call bb-install,BOLT,BOLT,false,true))
+
+endif # USE_BINARYBUILDER_BOLT
diff --git a/deps/BOLT.version b/deps/BOLT.version
new file mode 100644
index 0000000000000..6a785041e163f
--- /dev/null
+++ b/deps/BOLT.version
@@ -0,0 +1,11 @@
+# -*- makefile -*-
+
+BOLT_VER := 18.1.4
+BOLT_JLL_VER := 18.1.4+0
+
+## jll artifact
+BOLT_JLL_NAME := BOLT
+
+## source build
+BOLT_BRANCH=llvmorg-$(BOLT_VER)
+BOLT_SHA1=e6c3289804a67ea0bb6a86fadbe454dd93b8d855
diff --git a/deps/Makefile b/deps/Makefile
index a6e5bce0d4e04..b87a3e1e58609 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -171,6 +171,13 @@ ifeq ($(WITH_ITTAPI),1)
 DEP_LIBS += ittapi
 endif
 
+ifeq ($(WITH_NVTX),1)
+DEP_LIBS += nvtx
+endif
+
+ifneq ($(WITH_TERMINFO),0)
+DEP_LIBS += terminfo
+endif
 
 # Only compile standalone LAPACK if we are not using OpenBLAS.
 # OpenBLAS otherwise compiles LAPACK as part of its build.
@@ -194,7 +201,8 @@ DEP_LIBS_STAGED := $(DEP_LIBS)
 DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
 	openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
 	objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
-	sanitizers libsuitesparse lld libtracyclient ittapi JuliaSyntax
+	sanitizers libsuitesparse lld libtracyclient ittapi nvtx JuliaSyntax \
+	terminfo
 DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
 
 ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
@@ -228,9 +236,11 @@ distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL))
 	rm -rf $(build_prefix)
 getall: $(addprefix get-, $(DEP_LIBS_ALL))
 
+include $(SRCDIR)/BOLT.mk
 include $(SRCDIR)/csl.mk
 include $(SRCDIR)/sanitizers.mk
 include $(SRCDIR)/ittapi.mk
+include $(SRCDIR)/nvtx.mk
 include $(SRCDIR)/llvm.mk
 include $(SRCDIR)/libuv.mk
 include $(SRCDIR)/pcre.mk
@@ -254,6 +264,7 @@ include $(SRCDIR)/libgit2.mk
 include $(SRCDIR)/libwhich.mk
 include $(SRCDIR)/p7zip.mk
 include $(SRCDIR)/libtracyclient.mk
+include $(SRCDIR)/terminfo.mk
 
 # vendored Julia libs
 include $(SRCDIR)/JuliaSyntax.mk
diff --git a/deps/blastrampoline.mk b/deps/blastrampoline.mk
index bd1cb65c6ae2d..cfa28a4d8b88f 100644
--- a/deps/blastrampoline.mk
+++ b/deps/blastrampoline.mk
@@ -16,16 +16,16 @@ $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured: $(BUILDDIR)/$(BLASTRAMPO
 BLASTRAMPOLINE_BUILD_ROOT := $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/src
 $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-compiled: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured
 	cd $(dir $@)/src && $(MAKE) $(BLASTRAMPOLINE_BUILD_OPTS)
-ifeq ($(OS), WINNT)
-	# Windows doesn't like soft link, use hard link
-	cd $(BLASTRAMPOLINE_BUILD_ROOT)/build/ && \
-		cp -f --dereference --link libblastrampoline.dll libblastrampoline.dll
-endif
 	echo 1 > $@
 
 define BLASTRAMPOLINE_INSTALL
 	$(MAKE) -C $(BLASTRAMPOLINE_BUILD_ROOT) install $(BLASTRAMPOLINE_BUILD_OPTS) DESTDIR="$2"
 endef
+ifeq ($(OS), WINNT)
+# Windows doesn't like soft link, use hard link to copy file without version suffix
+BLASTRAMPOLINE_INSTALL += && cd $2$$(build_prefix)/bin && \
+$$(WIN_MAKE_HARD_LINK) libblastrampoline-*.dll libblastrampoline.dll
+endif
 $(eval $(call staged-install, \
 	blastrampoline,$(BLASTRAMPOLINE_SRC_DIR), \
 	BLASTRAMPOLINE_INSTALL,, \
diff --git a/deps/blastrampoline.version b/deps/blastrampoline.version
index 871053db3c9f2..fd055e1ae8120 100644
--- a/deps/blastrampoline.version
+++ b/deps/blastrampoline.version
@@ -2,6 +2,6 @@
 BLASTRAMPOLINE_JLL_NAME := libblastrampoline
 
 ## source build
-BLASTRAMPOLINE_VER := 5.10.1
-BLASTRAMPOLINE_BRANCH=v5.10.1
-BLASTRAMPOLINE_SHA1=ff05ebb4e450deda0aebe8dce4d4f054e23fecfc
+BLASTRAMPOLINE_VER := 5.11.0
+BLASTRAMPOLINE_BRANCH=v5.11.0
+BLASTRAMPOLINE_SHA1=05083d50611b5538df69706f0a952d8e642b0b4b
diff --git a/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5
new file mode 100644
index 0000000000000..62e63ff3174d6
--- /dev/null
+++ b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/md5
@@ -0,0 +1 @@
+c12540d5889cef05bc87183a4ce5a54c
diff --git a/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512
new file mode 100644
index 0000000000000..0635e180ac9a5
--- /dev/null
+++ b/deps/checksums/BOLT.v18.1.4+0.x86_64-linux-gnu-cxx11.tar.gz/sha512
@@ -0,0 +1 @@
+61cc7cc42b925f37502eed0d31eafadbfdc24a9ebc892c9b8d96a27b004cbccf2e5da7face5c8d9c9db57fac1b5cf662d890a67337436c5d4aa3373256638ab1
diff --git a/deps/checksums/Downloads-89d3c7dded535a77551e763a437a6d31e4d9bf84.tar.gz/md5 b/deps/checksums/Downloads-89d3c7dded535a77551e763a437a6d31e4d9bf84.tar.gz/md5
new file mode 100644
index 0000000000000..611f3dd448d98
--- /dev/null
+++ b/deps/checksums/Downloads-89d3c7dded535a77551e763a437a6d31e4d9bf84.tar.gz/md5
@@ -0,0 +1 @@
+2472bd6434d21c4b3e3199437e6fdcf7
diff --git a/deps/checksums/Downloads-89d3c7dded535a77551e763a437a6d31e4d9bf84.tar.gz/sha512 b/deps/checksums/Downloads-89d3c7dded535a77551e763a437a6d31e4d9bf84.tar.gz/sha512
new file mode 100644
index 0000000000000..6937982e838f3
--- /dev/null
+++ b/deps/checksums/Downloads-89d3c7dded535a77551e763a437a6d31e4d9bf84.tar.gz/sha512
@@ -0,0 +1 @@
+0a3fa9a09de81aa9676dbc7448408c7503f45e42519a2667540ad890316c7da089c95de5464a2032171f963c6f3cba73d6b3c246f1c7ac6ede283fc8132d5209
diff --git a/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/md5 b/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/md5
deleted file mode 100644
index fc3bce951cafb..0000000000000
--- a/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-97bb33510fadec7f4cc4c718c739e9a0
diff --git a/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/sha512 b/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/sha512
deleted file mode 100644
index bf2821e8252b0..0000000000000
--- a/deps/checksums/Downloads-a9d274ff6588cc5dbfa90e908ee34c2408bab84a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a362aaf762f42deebb8632a7a7980cd22b2777e8c4dc629e418580269e24a64217ad846d61acad70438cfdc190e47ba2ff7716edd4e04d8d10c1d765efce604d
diff --git a/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/md5 b/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/md5
deleted file mode 100644
index 5e99f7453cfe2..0000000000000
--- a/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-08230d0801fda3c81927d558452215e4
diff --git a/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/sha512 b/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/sha512
deleted file mode 100644
index 16d15cdef3104..0000000000000
--- a/deps/checksums/JuliaSyntaxHighlighting-4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-0386841dcf30ee53f7f95dd3206e1208482507d157aa09739873de2a56e5ca3d7bbf27eccd9f4ed81c1c0fea229673475f6454fe94df0ff960563ca4c29ed36c
diff --git a/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/md5 b/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/md5
new file mode 100644
index 0000000000000..cbcb8097d1673
--- /dev/null
+++ b/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/md5
@@ -0,0 +1 @@
+3dc1387ed88ba3c0df04d05a86d804d0
diff --git a/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/sha512 b/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/sha512
new file mode 100644
index 0000000000000..2e58061d16058
--- /dev/null
+++ b/deps/checksums/JuliaSyntaxHighlighting-b89dd99db56700c47434df6106b6c6afd1c9ed01.tar.gz/sha512
@@ -0,0 +1 @@
+fe30ed73b257e6928097cb7baca5b82a9a60b2f9b9f219fbcf570c5ed513447f0fda2a48da06b57e381516a69278f7f8519764d00e9e4fb5683a5411e245ef45
diff --git a/deps/checksums/Pkg-046df8ce407659cfaccc647265a6e57bfb02e056.tar.gz/md5 b/deps/checksums/Pkg-046df8ce407659cfaccc647265a6e57bfb02e056.tar.gz/md5
deleted file mode 100644
index 099cd0640b2e2..0000000000000
--- a/deps/checksums/Pkg-046df8ce407659cfaccc647265a6e57bfb02e056.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4dd51346715552c50b186d26a01a793c
diff --git a/deps/checksums/Pkg-046df8ce407659cfaccc647265a6e57bfb02e056.tar.gz/sha512 b/deps/checksums/Pkg-046df8ce407659cfaccc647265a6e57bfb02e056.tar.gz/sha512
deleted file mode 100644
index ba39942f95ecc..0000000000000
--- a/deps/checksums/Pkg-046df8ce407659cfaccc647265a6e57bfb02e056.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-27fa365879bc42d1e9e41c077a2826350994de59a93f72c276b4b2a2b21495ffc0785b3f5c72ac054f7b6d8765b5e51f36792af2661e4989c422159cec600144
diff --git a/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5 b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5
new file mode 100644
index 0000000000000..b5b82565470c0
--- /dev/null
+++ b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/md5
@@ -0,0 +1 @@
+88b8a25a8d465ac8cc94d13bc5f51707
diff --git a/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512 b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512
new file mode 100644
index 0000000000000..a746b269d91f0
--- /dev/null
+++ b/deps/checksums/Pkg-51d4910c114a863d888659cb8962c1e161b2a421.tar.gz/sha512
@@ -0,0 +1 @@
+22262687f3bf75292ab0170e19a9c4a494022a653b2811443b8c52bc099dee0fddd09f6632ae42b3193adf3b0693ddcb6679b5d91e50a500f65261df5b7ced7d
diff --git a/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/md5 b/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/md5
new file mode 100644
index 0000000000000..7182cc71f7b35
--- /dev/null
+++ b/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/md5
@@ -0,0 +1 @@
+2db86c7030acc973d5b46a87f32f7e99
diff --git a/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/sha512 b/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/sha512
new file mode 100644
index 0000000000000..a9e18eac9bfaa
--- /dev/null
+++ b/deps/checksums/SparseArrays-0dd8d45d55b305458d0d3d3451057589b684f72f.tar.gz/sha512
@@ -0,0 +1 @@
+0d3f54e7e75b48966e1816608d6ddf62175b92a0c778813a562df20750c6ecef9e4ccc24f9f3fffe4051d4b6765332add8c289fcdc598c320f400cec57a223a3
diff --git a/deps/checksums/SparseArrays-e61663ad0a79a48906b0b12d53506e731a614ab8.tar.gz/md5 b/deps/checksums/SparseArrays-e61663ad0a79a48906b0b12d53506e731a614ab8.tar.gz/md5
deleted file mode 100644
index d35cbc567faec..0000000000000
--- a/deps/checksums/SparseArrays-e61663ad0a79a48906b0b12d53506e731a614ab8.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-19f6d3bcbeec7a123e8dde983ef66a9a
diff --git a/deps/checksums/SparseArrays-e61663ad0a79a48906b0b12d53506e731a614ab8.tar.gz/sha512 b/deps/checksums/SparseArrays-e61663ad0a79a48906b0b12d53506e731a614ab8.tar.gz/sha512
deleted file mode 100644
index f2c8db80327ce..0000000000000
--- a/deps/checksums/SparseArrays-e61663ad0a79a48906b0b12d53506e731a614ab8.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8cef45d83047eba97edcaed04bb49f5aabdd96ec951baaa772c7da0402259e9578cfa383ab882440f685338ed14f797afe776a14e6aeea9df2428aa1592fcabf
diff --git a/deps/checksums/StyledStrings-d7496d24d3f05536bce6a7eb4cd8ca05a75c02aa.tar.gz/md5 b/deps/checksums/StyledStrings-d7496d24d3f05536bce6a7eb4cd8ca05a75c02aa.tar.gz/md5
deleted file mode 100644
index 3a5fccdec0fba..0000000000000
--- a/deps/checksums/StyledStrings-d7496d24d3f05536bce6a7eb4cd8ca05a75c02aa.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a02cd2c8bedd83b74917cf3821c89f46
diff --git a/deps/checksums/StyledStrings-d7496d24d3f05536bce6a7eb4cd8ca05a75c02aa.tar.gz/sha512 b/deps/checksums/StyledStrings-d7496d24d3f05536bce6a7eb4cd8ca05a75c02aa.tar.gz/sha512
deleted file mode 100644
index a042e4f306275..0000000000000
--- a/deps/checksums/StyledStrings-d7496d24d3f05536bce6a7eb4cd8ca05a75c02aa.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2e86daa832533f0369e66e359d7d8f47002f93525f83233c809007a13dfd05a201bcd273b3cb4f3eba2586e98cc9afa43c242f67dc18b91fc898d98a0bd8fde9
diff --git a/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/md5 b/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/md5
new file mode 100644
index 0000000000000..0d39747d275ba
--- /dev/null
+++ b/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/md5
@@ -0,0 +1 @@
+bf7c157df6084942b794fbe5b768a643
diff --git a/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/sha512 b/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/sha512
new file mode 100644
index 0000000000000..d0a8d6cec08cf
--- /dev/null
+++ b/deps/checksums/StyledStrings-f6035eb97b516862b16e36cab2ecc6ea8adc3d7c.tar.gz/sha512
@@ -0,0 +1 @@
+ba2f6b91494662208842dec580ea9410d8d6ba4e57315c72e872227f5e2f68cc970fcf5dbd9c8a03920f93b6adabdeaab738fff04f9ca7b5da5cd6b89759e7f6
diff --git a/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5 b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5
new file mode 100644
index 0000000000000..2f81a0d9191b5
--- /dev/null
+++ b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5
@@ -0,0 +1 @@
+46541001073d1c3c85e18d910f8308f3
diff --git a/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512 b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512
new file mode 100644
index 0000000000000..e2eb44845e276
--- /dev/null
+++ b/deps/checksums/SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512
@@ -0,0 +1 @@
+f7470a447b934ca9315e216a07b97e363f11bc93186f9aa057b20b2d05092c58ae4f1b733de362de4a0730861c00be4ca5588d0b3ba65f018c1798b9122b9672
diff --git a/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5 b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5
new file mode 100644
index 0000000000000..3c7510a592760
--- /dev/null
+++ b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/md5
@@ -0,0 +1 @@
+1d606dfc60d2af892009213650169129
diff --git a/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512 b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512
new file mode 100644
index 0000000000000..ec49e695cbb3a
--- /dev/null
+++ b/deps/checksums/Tar-1114260f5c7a7b59441acadca2411fa227bb8a3b.tar.gz/sha512
@@ -0,0 +1 @@
+6e60d74d00ffc2e1a5a9c13f59b3e3fc4360e641b9f0e3e4797c8b524288e779397bd56a8e57f47d5a06d1e6f359c86917164ec7f6e0ac3d6e876dfa09d2b0c8
diff --git a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5 b/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5
deleted file mode 100644
index 921ffb0a2561e..0000000000000
--- a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b3d21b3f38cd106e64fa9d058d095651
diff --git a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512 b/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512
deleted file mode 100644
index cbf6ad4952258..0000000000000
--- a/deps/checksums/Tar-81888a33704b233a2ad6f82f84456a1dd82c87f0.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-61bd3555de7a2cec265ae72d58b4635f84ec75b993b9dab2dc5be64375b6057972a2786337f90742ad3b91c57f5008372a3a4f8a5b589e2cf4d5cd1a8056e03c
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
index b8c4c68c661ba..edb8cadc74846 100644
--- a/deps/checksums/blastrampoline
+++ b/deps/checksums/blastrampoline
@@ -1,34 +1,34 @@
-blastrampoline-ff05ebb4e450deda0aebe8dce4d4f054e23fecfc.tar.gz/md5/48ec847f7a687dd36789d6365d3c5645
-blastrampoline-ff05ebb4e450deda0aebe8dce4d4f054e23fecfc.tar.gz/sha512/85f6a46e7fe5f76ff8cef5776dad73b17eb97be3b16ca1af961cf2c2cbe125c629bd808b0243b793e4235dcb545a02cc082eaf14b3a438f3e0973d46921550a3
-libblastrampoline.v5.10.1+0.aarch64-apple-darwin.tar.gz/md5/cbbb4b5a6ebee04d686f072a69e855be
-libblastrampoline.v5.10.1+0.aarch64-apple-darwin.tar.gz/sha512/32eaebb0fa3c0bc85a270b5c13fecaaa86ee10b4cea04405672badbaaa5ae3f22757dc758d9d971c811dc100a8ebd72fa00391238c0227de3690341f0434842a
-libblastrampoline.v5.10.1+0.aarch64-linux-gnu.tar.gz/md5/da097a9459dcb8554f3d9511ea1a1c88
-libblastrampoline.v5.10.1+0.aarch64-linux-gnu.tar.gz/sha512/0159dbd4579d2a334f4341a64841bc1cef1354fc744709055339957b299b2b36b30162c2c90367abc04a2fb2f236aaa1fe6eb290393702f6fb97eaa79e4bb028
-libblastrampoline.v5.10.1+0.aarch64-linux-musl.tar.gz/md5/f32839481836dad6a1b159d9c33db752
-libblastrampoline.v5.10.1+0.aarch64-linux-musl.tar.gz/sha512/b973e739ab4af6ba93328943b03f16f02625553efc2375909b5e5bed4446287a21f99025817ce73267cac2d0b6b65f7dc2a5bd4b4c88d263b3c923b2ec3ad5c4
-libblastrampoline.v5.10.1+0.armv6l-linux-gnueabihf.tar.gz/md5/23eb2cbc1a547f94935fa4f9ffa2285b
-libblastrampoline.v5.10.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/0681497bac1d8f3ff1932adbb9fdd0b710b2a28ca7f2f4bb0093ba1123b14acd8bcb062e81e538c6e51ed8449ffea582cdb5b610e97d0c76a6feb58545938a6b
-libblastrampoline.v5.10.1+0.armv6l-linux-musleabihf.tar.gz/md5/4e5168b1ada4e36861aeb3f4a6ace318
-libblastrampoline.v5.10.1+0.armv6l-linux-musleabihf.tar.gz/sha512/4ee663d2d3665e6ea356cfac60274c5f06ab08c1ee99b345ddda6872125663acb5559f704d0a918706e6cb075fc3071aaec4bcc3b9fee5fee72696e2f1454fb3
-libblastrampoline.v5.10.1+0.armv7l-linux-gnueabihf.tar.gz/md5/a28e3820fdf1435027f69204a553b5f9
-libblastrampoline.v5.10.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/48edfc069aeaead468ffb6145986b11a040286990520b191e0f9cfa99f0b9458e6b17e523c8cc81889af7c9c2adce6372c65f2205a20c8e778614eaa06d288f9
-libblastrampoline.v5.10.1+0.armv7l-linux-musleabihf.tar.gz/md5/c5ea1756f3d58f8a74e76958f3a93658
-libblastrampoline.v5.10.1+0.armv7l-linux-musleabihf.tar.gz/sha512/f3eb003f954ffc346ae1037325b56fb2e4db9a6f88cc878862f921df79d8e0a5c8da9d229610dcd5d21c3d7af0a61ddcc0e70e32bf45fc9ea828d9ab2d1ddda8
-libblastrampoline.v5.10.1+0.i686-linux-gnu.tar.gz/md5/8bbdd602fed40577c4c9f020a8304c57
-libblastrampoline.v5.10.1+0.i686-linux-gnu.tar.gz/sha512/67947bd68c9f1131311d5d6a0fbcc92540f2fb2e1d2d0fa46951033fd75658661ba355c415b68de5dcd1bf0c440e27e3362ece70f5fd989fade796e9e723becd
-libblastrampoline.v5.10.1+0.i686-linux-musl.tar.gz/md5/455bb539e7646e060fa24fb59c82f2f0
-libblastrampoline.v5.10.1+0.i686-linux-musl.tar.gz/sha512/e392334512ebce93ea4b34265ead802c543db5678c30083fb0dce08c071dd7140a9532d3162f215815807650138ffec5ad5d6d848025ee3913dfe353308d8e57
-libblastrampoline.v5.10.1+0.i686-w64-mingw32.tar.gz/md5/9a1c6845cb2e85b3497cd01d3a89b06b
-libblastrampoline.v5.10.1+0.i686-w64-mingw32.tar.gz/sha512/66a9429a70575f4fd19d1cfb263c4c7801ac4a88408f98125f6e347b0ba35d2fdc4cbb82bf7407462beab1f7a7df2184163f76d5f2330f485bc1c7e5354716aa
-libblastrampoline.v5.10.1+0.powerpc64le-linux-gnu.tar.gz/md5/b2b3eea1cfce87642a1f2afa125dcc5c
-libblastrampoline.v5.10.1+0.powerpc64le-linux-gnu.tar.gz/sha512/43d5bf6535ad8f0910a523a3940787db956a3700681cc0dc1e2a1aabdaafa669e46e42854df29c0dcff06b3ade899159cb4845a48a6e618ba52af7276151fd0e
-libblastrampoline.v5.10.1+0.x86_64-apple-darwin.tar.gz/md5/497a8f88c810a12b3faf12851427c784
-libblastrampoline.v5.10.1+0.x86_64-apple-darwin.tar.gz/sha512/7e3ed2117c6248761ba5bc3fd339f12ca98050d163d5c3668a62ee90aec10858d30fe9d78cea01796c9b2231cdd4f9ad0ae886bf8e984cb24d745e9f8c0fd62b
-libblastrampoline.v5.10.1+0.x86_64-linux-gnu.tar.gz/md5/355612dc7c383dd860dc03498254814b
-libblastrampoline.v5.10.1+0.x86_64-linux-gnu.tar.gz/sha512/12d803c53f705dacf2bf5f3884bd9b40f89a248ebda8bce1da6bba0cfe4331222bed5124dc45ea377e7c0fcc2d0dc624cc71b0eb454319fd12e2fd4c58d265f7
-libblastrampoline.v5.10.1+0.x86_64-linux-musl.tar.gz/md5/78a09fe918b1b0b3dc72c17c2e62799b
-libblastrampoline.v5.10.1+0.x86_64-linux-musl.tar.gz/sha512/1ff3d7e8d36d450f430119b30e03a64f2d78d6d13a04e4a4b97c64966e341f486080c733dbd73ee3ed7c1557ad737f37c013335578e1555d162f0591929de747
-libblastrampoline.v5.10.1+0.x86_64-unknown-freebsd.tar.gz/md5/ad9f213bc4a7882784ad09017fc82234
-libblastrampoline.v5.10.1+0.x86_64-unknown-freebsd.tar.gz/sha512/4de6f08a45cb3c3819f71ccd44688b847c2e9b36e0d4bce94191558fe2d775c2790f4c68eea1a366d0a869f0c986aa33626d427946403cf4e128f45b5881f70e
-libblastrampoline.v5.10.1+0.x86_64-w64-mingw32.tar.gz/md5/2d0cf117d8d797e7716f8d836dfdd9f5
-libblastrampoline.v5.10.1+0.x86_64-w64-mingw32.tar.gz/sha512/d7a94f3a71400b22b6c14648455e38dff750eb88661928b66b307f721d53769dea3aec43bb86e2200145ed072475c32e1bfc38e0fc35445c4c42e5752754b0e5
+blastrampoline-05083d50611b5538df69706f0a952d8e642b0b4b.tar.gz/md5/700b22cb26291736bd1263cd2a7f2d75
+blastrampoline-05083d50611b5538df69706f0a952d8e642b0b4b.tar.gz/sha512/967c16d28834df112916c0904dd4c7231a1c5e4edf279adb26411faa17da28eee4680ce2347b3941520dccbc768944277a8f724b21976960d00f840349b90e36
+libblastrampoline.v5.11.0+0.aarch64-apple-darwin.tar.gz/md5/769458d40e004d6126cae6b34351068f
+libblastrampoline.v5.11.0+0.aarch64-apple-darwin.tar.gz/sha512/75a726b9a4f41b70344ceb9e1f1a7ad370bfa84ce44c70b8a965061d777871e3bf2237ae055da7e6202ddef78932ba8baf2a01a675b1b0cec5338ef16ea2081b
+libblastrampoline.v5.11.0+0.aarch64-linux-gnu.tar.gz/md5/d92cf3f3fa1e977ea3a1a74acc8442d1
+libblastrampoline.v5.11.0+0.aarch64-linux-gnu.tar.gz/sha512/3354f4eec2a410f81cc0546a04ce98ddd416d441c1701a59ec5bebea99af8823b5af10a85cb4e3377548422c6d6a0a870f2e7a05ad0cda52c6143361d59ba4fb
+libblastrampoline.v5.11.0+0.aarch64-linux-musl.tar.gz/md5/41d060c03202b662e47bda5fbf7b1e84
+libblastrampoline.v5.11.0+0.aarch64-linux-musl.tar.gz/sha512/54a05516e12350441c33341fde53bc912aa52dc4b746089c2d21cb75f24f0fb140849a520327db6f52895743eab090b59fa974a2a426a49f8b4e38693340a306
+libblastrampoline.v5.11.0+0.armv6l-linux-gnueabihf.tar.gz/md5/4930dceefac63e7aa5a93e1ba0e00e59
+libblastrampoline.v5.11.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/dafce083c2b409ead61fdbdf4f46b7c93cab00c82a74a181d381c4a93f1e7af035cd6caf407b0199c1f8c2f2f68f93d67938ef092fa4a8d1133f0ea73fb51a9c
+libblastrampoline.v5.11.0+0.armv6l-linux-musleabihf.tar.gz/md5/82346cc4ddeaa29ea7a081edfdfcb08b
+libblastrampoline.v5.11.0+0.armv6l-linux-musleabihf.tar.gz/sha512/72e387bd661096a46077e8c15e12f8a6f18fd6aaf30af0678d00eca0d83af10758874643f5716539dd38269e831e4649d45db739aeb60996bf1b96277cea1d17
+libblastrampoline.v5.11.0+0.armv7l-linux-gnueabihf.tar.gz/md5/7e8f115268e8c62acaa2a53ecd32e2fe
+libblastrampoline.v5.11.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/4210c306ff7ccb53aa6c9f45e134c63b238c563ed753f7536dfc21f6962dfea35d9de62e429e2685b70d0db780ac766b72fd5e76e2d62df74000e3e5d553c30f
+libblastrampoline.v5.11.0+0.armv7l-linux-musleabihf.tar.gz/md5/7f388611c477b528a091f697b0d334d9
+libblastrampoline.v5.11.0+0.armv7l-linux-musleabihf.tar.gz/sha512/e9b017dfa8c19cb940395b253f3b28511a6619469fabff7ab1671ed0936e9e0681d1385c3d1f5d6417ccb65ffbdcf53a0c8519d4ef8e89f9500a05ca00296144
+libblastrampoline.v5.11.0+0.i686-linux-gnu.tar.gz/md5/254948ea87a435251b1e064a77b3d635
+libblastrampoline.v5.11.0+0.i686-linux-gnu.tar.gz/sha512/5a51d3c20c49c497a8f0c2d2e7b38b49ec5e367c7013a7f0efa4fc099639da20ef9c0bfdbdfbdc40b27ce61f189b18f5cf617d7a0ed4bc5300da692f7d6b77a4
+libblastrampoline.v5.11.0+0.i686-linux-musl.tar.gz/md5/a9504870af8db1e247be02c5e188f7a5
+libblastrampoline.v5.11.0+0.i686-linux-musl.tar.gz/sha512/5f0109168a16edb8ca66fcf10c2c10b57fe9c3061c0b08dac4dea936538fa5854aa1b66079f127b5d9902288b61772054013256aa307b682de38e350b1bbb367
+libblastrampoline.v5.11.0+0.i686-w64-mingw32.tar.gz/md5/815822f6bacb42c35b80bc77458c5c49
+libblastrampoline.v5.11.0+0.i686-w64-mingw32.tar.gz/sha512/c82f8c6fe0b7917860e5601c79e35d56297c53b6f7f992841d4f048e7981533e459f9db0805a16d82a9e03d452489760def0d9c57181dcfa5dc363102180eecd
+libblastrampoline.v5.11.0+0.powerpc64le-linux-gnu.tar.gz/md5/ee30c9cb4c51df03026f9e471040e9cc
+libblastrampoline.v5.11.0+0.powerpc64le-linux-gnu.tar.gz/sha512/5055d83a1b0625364ddd97652a4c6fa39c795078123cad33a085283889274f66c9dc053be0591c14be262dc7eef666726afa922c66ae8d05c2791c3d6bd7009e
+libblastrampoline.v5.11.0+0.x86_64-apple-darwin.tar.gz/md5/210cd354c9b4a8aa2a2b55723597e58b
+libblastrampoline.v5.11.0+0.x86_64-apple-darwin.tar.gz/sha512/1ee65d598f9f8a2cf7137135c8c2c431520b1cde319fc33dddfbdae9fe01d986e979a97c24cf85c090cc40064cfe47c376dfeb088ff417d17868c4df84fb2fd4
+libblastrampoline.v5.11.0+0.x86_64-linux-gnu.tar.gz/md5/e2213c42eebee6e45079ef6831077b3f
+libblastrampoline.v5.11.0+0.x86_64-linux-gnu.tar.gz/sha512/ab2c3026d34962a2ca5116d71a4e8eaaca5182d53f21edd3e4be81ce26e74e427c88797308af7fbbf1b9ee615e0383acf0dae1d0eb207ebc64dddaf927f00b48
+libblastrampoline.v5.11.0+0.x86_64-linux-musl.tar.gz/md5/8cde3c618e882ea2b7c8a017a69175c7
+libblastrampoline.v5.11.0+0.x86_64-linux-musl.tar.gz/sha512/8a3aca5691c3936d114c804471b2429b9ae81308f020247765614d2f792f93a012263ce4baa31cf42f4dacc23a7161a4c7f9debfba8d9028879f1ed3fc4e2433
+libblastrampoline.v5.11.0+0.x86_64-unknown-freebsd.tar.gz/md5/b02eb694e1486ef8ffe9534ac2bd5ec6
+libblastrampoline.v5.11.0+0.x86_64-unknown-freebsd.tar.gz/sha512/989273809ae567d7e7193529740423ac1870eae3a0effeecc67f84da914d81649786f393e101f013b7232ef5fe35066d89b3cb776ad0e87394799491ef28a467
+libblastrampoline.v5.11.0+0.x86_64-w64-mingw32.tar.gz/md5/6e7f602ab0bf5a5c28bf4e959a1bbf77
+libblastrampoline.v5.11.0+0.x86_64-w64-mingw32.tar.gz/sha512/556e7ca1a2576c1d7825ac1bc2449ffe2cd40391cf316b10f60681a5c736939c97eb5221c2837640928b5544f89f44cb14ca44ccf54092376390ea1a6012c9e5
diff --git a/deps/checksums/clang b/deps/checksums/clang
index ee3dc2125ea30..7dc297db9c05b 100644
--- a/deps/checksums/clang
+++ b/deps/checksums/clang
@@ -1,108 +1,108 @@
-Clang.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.asserts.tar.gz/md5/5a9351db0940c66e9646e0f3d6f37e1a
-Clang.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.asserts.tar.gz/sha512/bf344cfe91795cfc4419ea9ec50df99237b64c57e0b81655a957b15ecc5b16f0134daf189f18fe34274df8de679d407b36f82e3723e80428afb456215a5b9a20
-Clang.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.tar.gz/md5/6c7461a52e07a1e3ecf9911784bb26cb
-Clang.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.tar.gz/sha512/ce63de6405cd7c34d640afb259de8056db175e55bece923ce53c39b88dccc2885de70f4c598b3282102754b0c7cf6ac602e827968b6509fd7affa20ecf07d1a5
-Clang.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/5446f22e4aba17482c057ee79beb2086
-Clang.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/372be10dd8752821c751e571a5a9fc4af328285dcad6f2213f6e3d54f819405c26f1a1cb1e712d4bffebe3a42ca0736903d59ba70602a8ddd96b9e6fdfb9bacf
-Clang.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/ca146aa3731ef24300c8398ddfb7ffd4
-Clang.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/2459729a3af81fb962c7491ff16209fe6b65f4ef2f8323857b7c548da506d227a42a9b3301b8c8465cff66bbc9acbff2ac3e86d1a8560b9cb701b133317cd730
-Clang.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/68c478b00a6cb1ecf700b54c86acc584
-Clang.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/669af6e27ee67ea1be7f70cc328193d6139161264e1f6ef85c31c62523997246050d8b8aa241b257a191a9566df1f3ea620641c676908b817d5dfe7519ba0c8e
-Clang.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/a299ea50c4862dcd0832cc483aa9c172
-Clang.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/e42e2e09be2872d2e1c57a46099b92847873dd4ebc87801cbd5fc171bbb236ef8bb815c4fd481d4a804fd6604fcb3cee7ee6fa66e712cc7a2fc2434d0649932a
-Clang.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/md5/804ec7eeb6b2fd8fb6ad9537bebc0f3e
-Clang.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/sha512/36cda1267528e54658b28d31ed4218ecb11c2a05e161faf80030a899d2cb1d1ed145bdf19f7565853277230428d521a2b33759df09e0e799226b44d477c2de46
-Clang.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.tar.gz/md5/4fd80844867e14a1a245c2ed911942bc
-Clang.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.tar.gz/sha512/acc6a4d240b54682425f40d8c5b6e3578d818b03430696c3c90cde8ad8474de5692468379cfc7f4d826bd44a6fb38cdc036df9d31833d09477fb69ae569e160c
-Clang.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/md5/1774c2ed22a44aab72d7cf58c8a85ab0
-Clang.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/sha512/686b2bd8af2f47b03b25e23ac2d2954e5b9ccdcd942fcc6ccb9b96240a1fe414ac73d88f173dfdf93f45eb609e99299041a6a976a90b7afc6e49b698b6403a94
-Clang.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.tar.gz/md5/e5e0d42647b5b50102f68e76823be11f
-Clang.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.tar.gz/sha512/72b112ab6714d67c33088c978420f996a45c6fd1900ab1725908792a5b8a0f0407ecf53817761059fa4bebd81a443680c8cd1c25633a2eb702986a23072d7b70
-Clang.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/9601966aed6cdf5af9b6da24bf088ef8
-Clang.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/c70448ed703f74c0f82c43ba35bf3a1f4f02832e42bea795b5ae0caae1e3b8aa0fdd914b3a6753a68b9f0a7d3a5156e56fad35bd6fff15bc5566f74db26ce3ca
-Clang.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/md5/3aa7b1d67c252aab4cfb85a665ecf194
-Clang.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/sha512/7575858507e07b94001200c4d4e9875a2a4c8b97074655a1c1c03edd8cd2bbe853cca3a518d36d6f3243649839aa53d4dbe2e637eaf9b93873b2dd82266d4e17
-Clang.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/7729c1ebdde07122fd0f9ae63a14c34f
-Clang.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/04fe96e57a5351c3265441b73919900f1d19220747aa8ac061ef15531ee3f6bd62a70a4405c615c3b14975250b039d81fabde3b351c6e2118e684ca479eacbaa
-Clang.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/md5/37536e1d18b6b0a948b83ebcee530af0
-Clang.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/sha512/d94c3bd7afbca9dc81bb10a4d0e041165e63a2ac9dfcc1483bd55da1091c664074e9a26c9972da23adca3f711ffd22ba517546f0f0e58b717f3043973def0e97
-Clang.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/ded5fe5d4de1c3bfd0fc75494c44cb11
-Clang.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/85f6df75ed2b67fe9dcdea745ac1e659c0335c17c54b03c964d6996e926303fbf14c1c2ed6b369ffa38bde827c9894c32de82afa562673dad3268c43985dc7ce
-Clang.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/md5/e4eac3b4bfa0e3b825cd23f88738bd3a
-Clang.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/sha512/c0552d1c9dbfe73b07f3ea2907d879c1142e8f6db6530297ee977c2b23858a9f55674f635a4746262e7595af2ca41a752a6abb4944e6707f4daa3a8c0715df83
-Clang.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/4b5f66f39069204ba7281740115b7ef7
-Clang.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/4c789017ec2bd98e808d198a8baefb2405222efb6e93eb5d1b8944dbd1e587afc41ebaf506b0aed5def5eb4c815ef4c822e0e0477b4aaac35fc03f9850853be3
-Clang.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/md5/95310b525b635b8c6550226e2b9fe02b
-Clang.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/sha512/faa62d41124b92d829658397eaada3ee8ce41f2e36e7f9954bd3fdd882a1232bf3431f36c9b7e65c17ae4f228da9ac37e1db0b1ae43a8540765186a230c65bab
-Clang.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/5886d8e09d76ed74c2293c0af8be413f
-Clang.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/ca66bb3bd39b7643cf0dcb551a6bb7a3293f4c99f8d4ae1fc16eb66a0f0da0ef10acae52169b2522dc2fdebc1f550d2d36b87bb25d9b1c9df0a8f0d5089c7642
-Clang.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/md5/7b08c562565e408d716898bf37e44eda
-Clang.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/sha512/e9051454cc15a7879d90e6b36eeaf4c956e328be7823a1fa37cb98197c0fb4dddb9aaa8cf7aedd35e0affa9e6876b79f9a1160da1ec4d26ea7c775db58293dd2
-Clang.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/dc816bd807c9d131e088c30caad9d326
-Clang.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/5700dcef831b52abf64cc9120352918c302301e19ecf6ac64aa2cfb6270f7b2c82fe3f0f1d3281539081db7d520e2301995d992b9e8234cf64d7ec88126f4bc8
-Clang.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/md5/971caa23440c190b085300f4cd67b080
-Clang.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/sha512/5ce854fee306c14ef7964e3ab9831e816c6eefab637221b71be2187f42e7b089c1dd9e92754ab5ee3198bb3c0e84da9a2cc15c2d6afcb086f61b897cfc320ab3
-Clang.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/5ad3e0a391db624713263226259b55f0
-Clang.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/ed4cf7c241bbaca9f887cfb81caca687e3f30d01922e05c72b435c6333b334fa4f708193b8c85de9777f9912fcd8a55b1d7a6cd1aef00b913f0d0f1439e9ed6f
-Clang.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/md5/5d487035d147bc48655e5538f08afbdf
-Clang.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/sha512/f26d96241b9b18609c1d4148e30048d73faed24fe3f623a5d2bc6aaa59a644cc97201acfbde2eed4daecc79602e6d13256e112c8b821b6d865d071db957268a2
-Clang.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/fdcf70b4514c3d63498a3fa46a2525c2
-Clang.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/d481c807405302eabc612075e22acc9f7d1cbdbb17de23b6a129dfba60c265eacf20cc2d48d5b4087979b1184a783bcd0bf6ed326060e5ef05119556f21a5a95
-Clang.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/md5/a8a04c92d74dcd22f980956d2b7ccb71
-Clang.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/sha512/af3ad3efc2941b98ca4ec1340e24beb1c1f1c5d2248da3000af3f2e7184df013b55127040cfd03a63acd461acdb4f1afcc6b11f1ad11502aa86f737629c185a2
-Clang.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/15d02f2f91fcdd52d641991d58b15c9b
-Clang.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/e1903be2164fb3002a93829a295d2a413c14faa2a0fad2297763a6cbd63ec0bcc37689cbec4c0f0bd0f4eb4cddc716f78d57c95f7ed29145ffed3b7c50a98d04
-Clang.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/cfaab26c1c8409362a267484c2ccfbc1
-Clang.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/2f9bb137df4666f9c2947a74a4a06489d477b5093c3b0acae11d6c1213c467e258aaa360183f8b18ca28778773a5170f5dd19ea3622294f0d715a5909c6d06ed
-Clang.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/6bb27685277eae5289f782657925c33a
-Clang.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/35bd7b862b2a2aa372be7f0bb01aa65dc58f71d2218833ca39f832a25c3162b3282c9806f3cdc4a9a2a7bc11167a6daec30b688b979f1a38f49fa412e4628648
-Clang.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/f44f9fdebc843c5947d1777e53c4189f
-Clang.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/09d1f9a29bdd04e598622cca11e0a82ef6a68f0ac9d4e6fada548482fff6219cef5714bdd3d02d1c0944de14ef14991ee0eab9b4f54c4efacae330a9e4ac47dc
-Clang.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/md5/0d7c790218fe40d1f87850197a5d08df
-Clang.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/sha512/fac636c5ede5baf7d5c706f348b8992d4bf0042cb34decb83964385d9877b7555db998fc79cd7f032d80b3572275f13fbc36ccf5a76e573221266e1ee339ec76
-Clang.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.tar.gz/md5/5d4f5b78645f75b93e76a75efdad721f
-Clang.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.tar.gz/sha512/a013697f1da103a1202a1728ebfc61ec0d08e705e0caa253cd14430abfa3d47a7b43930d3d9d70d995dbb1e5f78eebd02ef8cbfd9b028b447a3b7953d437a60d
-Clang.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/md5/8d9b46562cefc0ce9b7dfd6022cb914c
-Clang.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/sha512/95d8cd716bfbff69d336987a3ff0f65e28f48544679cf6bd165319cd5382f0eb9d5be119917a5b309e7544e43ac7c52f1370d159e67f18ff2eda06cf7bad31f5
-Clang.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.tar.gz/md5/86bf7e43fa750d620495eb73c938273c
-Clang.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.tar.gz/sha512/1a014fa2ec455fee7be9413fa1db901360e5728bcfffb7bb76fd3b30b00120883c91f4ebfcfe048e5f372bdcc18a2a45744ddb1e8c7e303d5952af49e386caff
-Clang.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/c125dafc105894bb0bb821bb7b28ce3a
-Clang.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/6da75d2fdc18ba95dd3db9181401a4ac0b7f8e465ad872f95f2e9db49701cc56da7c13f6ca69b01e15832f9bf23cd698ca5dcb28dcf775edef6bd5728ca669ca
-Clang.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/4b9d2090af04573a35c0d80833f9212d
-Clang.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/7e9231e286f15b4d90318a732d1fae3130a1d6714f6cf868f5d3472068b719818e4d2a63dfbb2056e1f3e7f2a25250c4de1f0629e459b36f7d8e1e42286470c0
-Clang.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/7762d01fc07748336997bee900003653
-Clang.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/c7334dfaa5317b89a58beb0b397a2784036c98fa3434f67efcbfd1ee9375a195ebfedbfcc2f7ddde00a518e2a175cd821e11b887a913499c10d60940c7cdbe43
-Clang.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/4d4388d2e621d71e390579d0684776d6
-Clang.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/b5dee74f399ed7422fc1dbb3321b8c216fe434ca57440c4ee51293b2478ef007df9f8d1031e714496014309ac8eabee4c7c884272181c0713253f43e1bcab3e6
-Clang.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.asserts.tar.gz/md5/927779c5fe29a5beb2d4bf0668a0fae2
-Clang.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.asserts.tar.gz/sha512/3c16687184eaf180b5a6861a4b96ddf0d099769bbfb21265ed434575c537b10a30803924f05aa53ec0684cce8b8ae31f3082124130d4a4ae31b717bfc01e7442
-Clang.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.tar.gz/md5/5c08fb48aa15c5ee9667a3e177f19851
-Clang.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.tar.gz/sha512/aa773bb698145fb4040c34d787d959c0db71c90da5a5e5bf6799d287fd7925049169fa1a681742e12f81a32156b1d958c2f1d92295888cf50ccd4b84fd798625
-Clang.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/302b59a86820fa43b0f62c8788f4129f
-Clang.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/543fad08f4d22a8ee5a25c29b81f8e6df729e19bd31d94cbb48d5cd9bf451fffdad692209d9a0a98583bd1cb22d3a783ecc140c10c65da58ebb1b727311aeea1
-Clang.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/5e904bd57f12999826ef39bc37382c76
-Clang.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/adb8061bc2debde834722b7965d4dc8ee5ea5a5fd5459eb84f872e13cb40a38d2e923f5dbe06cff5138e6cf065d9ce91e52ac395a586f0ac9c6f4f2fe1e4f0d1
-Clang.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/806b042515bb8294bacf37dd804dd303
-Clang.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/dabb1c73f477ca97ae2d84846ed937f084ceb9abcdd614a0fdbcab7b9d863ed8544ba25fc25f99df40254c964eb4abf89ed61bf4a61f680607e8d096f71a837f
-Clang.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/1e92606453daf98bbb5879aef9a0b97f
-Clang.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/f262e08e2b36dace3f7cae645709627368a221bc7d3a081e501f5a95797ef36e9c63071f17d722ee546e0993502c171049f285e22f665e3998e1a408e3adcdf0
-Clang.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/md5/d5519cf744be4d311c5a6ce97cd26d6e
-Clang.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/sha512/5ef23e8b5c0a828974dfa578eae68255e01c461f7d4bf10ec43f0bb4d2fe3b88c649dde8fe534550c602ceb835f7e736aa7d0b642c7ed21aa725c06c3bd4890f
-Clang.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.tar.gz/md5/22831802bfc779bac31c3b5fd5b613e5
-Clang.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.tar.gz/sha512/94be9e0c6c077409f8eadbdfd8ddc83901bf36f095563e630ba02a86234f30d67d4bff6df2cc33e1c52e494f231f33538ce1f8a25a2d0e187596c638121ed948
-Clang.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/md5/71a77a339451b9d49858ecbb11119efd
-Clang.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/sha512/49c3aa8c8580969750ea6d61fd69e98d1daf47b9578cf3372febd2df79542e22940a24d23ce16dee20e4bbc4becb9340f820d3d45f879fbc5209f3f9699ffe2f
-Clang.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.tar.gz/md5/6a6d68aaf9ba085c02ca1218cdce0246
-Clang.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.tar.gz/sha512/fcfbc12cb248021b4f8b9bcd7a21cf695b0bbb3983564a9602e6a23f83ef1b5d884927ae7b46ab5e3752a18d5346fc0b52a160ea18af1c9ee28870b470b4591f
-Clang.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.asserts.tar.gz/md5/e451f3326c665c8dbdb41ffa2b6362e7
-Clang.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.asserts.tar.gz/sha512/d2b2f812dcd0d9d7602f37bbb629a8573be8e1d97e40efc51fe4676d6fbe69c21aa1943b6fc7172e788d3b3d2fd9d02fc3279838fad70434caf3a9e427006336
-Clang.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.tar.gz/md5/995abd90c834cadde6f272e097ae51e0
-Clang.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.tar.gz/sha512/f8f2d4ef5e2fac7d5e3b06ba76f7f54791820e15f0ab1bbd182a5e70709fc29085c73f5709cb45267671a849dd965e01683c6ced91281ef9d64f4750cf5d6151
-Clang.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/md5/26143a5824a6564f69510f227acb6b1c
-Clang.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/sha512/1ce7c9cc3c9d04934f06a32d67f5c23f68cb26b917cf81c8e9844ae20eab4709110a4142d21b62b205c714363df463e63c2563011f432e2e0206731841798ea0
-Clang.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.tar.gz/md5/06dbf7651fe7d8b021fc1ab6beb125c3
-Clang.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.tar.gz/sha512/daddc731c54c13c0b2d665bb4360a400fec3246f6d756d5401a241cf6c9dcd2fb1df2f55c3559551ef9536d40067e9ae31753947756ef6210696b87856f831c2
-Clang.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/md5/722fa85d203c5da1b4e28a1510bfa27a
-Clang.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/sha512/cf8d09192ad248c6603de813b22bcb61e72994d0d39cfc4260d6f6e1ebe69386313f924c5e3de3021ce2041bc41d8a022623bae5c8979fcf81649c85ee25c9f1
-Clang.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.tar.gz/md5/85b8d93bdc92b4014d45f5dff6ba626e
-Clang.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.tar.gz/sha512/b1d66f9bcbaa3dc571fa8a1ca79f39f79ce4c7941bdd1a1fb7df2aae2c90960b9ffd7899237da1379e1898c18e2ffcc63eeefd20ba64550aca82167474981494
+Clang.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/ce3e582bcf2f92fdaf778339e8c51910
+Clang.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/4f977e8f0912f52b9b4054089a53a05f60bf7ae352c39b2541e68fecf3c21969d6d1b85e40d71d61040b65f7c60a2c33c8d259734bc1d2ddf77392fc425025cb
+Clang.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/1eda08774c2f9975de32bdce4ffc72bd
+Clang.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/c76ec1de9a25f4f8bd309336830cc07e1113b941ced12cb46976b24aebd4ab3d261c943dbc9cdfb34a01f27073af6f598dded31a4e03c62f229cd2e7d5982af6
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/2817b0eeb83eff4e1f580729e02564ab
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/88242559299836c7a7b7d3a216353fc6880a587a839793ed71d6d053318d6e2071ff218587a082f2b5dd9fb2b0952b4c60e62030d707435607303708bb1e6d81
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/d3f92998b7cc35a507cb1071baae8b02
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/be22296623f604927e2e815a1cc149addda6d567270a50b2cdf77fe5b09f74313210a1ca7b1b3194592da23490ba1ccfdab9f520ce7219989e646f12208e418a
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/716300acfdee4415f1afa3b5571b102b
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/b97efb3c461ea7d2736a3a8bb6b6b5c99f02df9a095f11291319c629d44f1fb934b124d38af6be3e5cc7103c6f85793d7f185c607383461de5d0c846560a1d1b
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/034f44b2fc61791234d9580402002fb2
+Clang.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/0b4ff55afcec0b1e8fbd09fab57de8b44d5ded360d3b53132c7a7df8d3a3b83a495bf6e0c706784e678c6de46be3a72e8bfe562c7f8dfad90b82880849625e35
+Clang.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/54211070d63a2afac6350d06442cb145
+Clang.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/a58f8afe9a20f202cf3956f758dc13a10be240d78877a02cd006d7e972751ed65623eef7e92a7256d9ed9157d6e277302f93b58f583d86d386ed4945f3c7d875
+Clang.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/7084567b3637fe64088fdce357a255de
+Clang.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/77ae83e159a814a7117cc859a0b2aa7a5d41f983d45b7eb1ce2fd2e93f8733ee067ac8c9fad9d5af90f852b8802043ef39c29b44430b2594892e57b61ccb680b
+Clang.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/9e294d16a6e1c2c76c03f32cbbbfbe23
+Clang.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/b8f83542b51f5cf953f6baed185550394744a8466307ee08525bf18a651fcecd7daafb98e75a0866b0e9a95a524e8940be7ae1878ba80d856182dcb7f7d2254e
+Clang.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/70a41c2ffd55d2d87a7b8728287eb9fd
+Clang.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/44bb3dea7227ee991b2666c43a88613d5b5d382eb560b5ad1f1184d38680c85a2ef961bac6ad71c2b920702c1ec6e09296198e7ff5e2929f4ba7839e55896e3f
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/95ee1406f8575898eb52e2c86ae18992
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/4da66e4d397491836b3e539258844346fe50bff41e6c0628cbb5c0eac76147bd91d1720cec1523452efdb063adf6ef8792dc278244e1f8e194ef60a180442c56
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/6c4e4e892b54ce81d73a8598728083e3
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/53d08fd8b6782867cfa6ce001b14a2fde38bc9ffc85c7e148aebf59dd9c1c535b54eaea816c39fcff42abc456c1047ed13d688917302bcc5a281abe368bd29bb
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5acc5853111bcd529eeb06ea31b329e5
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/b1794f7cdfba838a7e43de8f66700ae44fd16d8f06300e8ab955044ae9bc96110c5ea72691841cd3787cdc93dfb91c6b257702c20390689a8d1b45a994db2fd8
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/c4de50252e557fb126360001ddae6a97
+Clang.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/9343a7272c76d5341bb49273ff8d43bed09ad99b2879ec51cfb8946174181b286af82d85e2d3a13a375c7e7859e51e4a4f06031a6a3fe7e540700cfc6a795741
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/af301478b20e56cb7fa1160cda2573a2
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/8822c58df101c239221fead6fb523e677da04a065b42849a2e6ffff03dfd81e07f162a9bbdd29490ad9c0e0a33d362eec46608b9e6e42dfb4889da1c22191c91
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/901d2808599d5ac5ac7b5ca4bc39833d
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/820756cad00b1fe927801a253bd3077709c2b067ae79f9e1812f3cc9e85a0b7ac2ce1534031b7c6f7bda3364b7173c1c508e7c7d316920fb9bb901c16c1b18c7
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/d1f368604084e907c382aaf00efe452c
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/523b25f6b79e222eb65b5f4cd8f23b0d2c8b25b29af0df88efe45546ea57c7dabd88baef454fa0b76342d8d364739107271f25d3504380fdec5c9d225fcc2521
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/e57c116b2ad1cf32307eb4e600ac80be
+Clang.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/63366b983c7aac9fe1246b25432b2200c8316f569f6930eb12de3c867f448ffccb8756d418f92eae7751d4c9ce6c42cee38237e429b81530819684fd5150c93a
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/645929ce42276db10ab79184a60cd6e3
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/65555ed26d9bd670b8363e5dad949822c2bf0e141a5418e1dc30c3f8a4733dd050620e40be2e7552c2551ecb30d4ef3e8f74cb240f1d441a9720a25f5a3bcaa7
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/8424c6c6318dfa7bebeac33917b29453
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/6cf90c253f6b22358c2389a2347af2febd010117b22de0cc91ad713b8c8224627398004567c96b673650212eb5bd40bb97b9a637d46ddfeb3c72388d83445017
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/ea8151dc1dc32befe579c7f9d7f13898
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/ed518423e9ec35afd7983471cf9ff1e971b840f637f34e0f62a1f6c7379ea59d4dafbeb9a311d39761733ecc98c0318ce3d8883298f8998e9c741441c7c9616b
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/70ed39b13bcb0435fee63bc30ae25a39
+Clang.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/b2afa383346875514c62129c2991b3604c4fd3d507ecf4fc4244dec81d08b30218f5aa03dc4977185c2c9fb2d08848ddd373e448883ab472e5221ae5bf285c99
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/e6798835128f663f0c837aed4463e34b
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c99856e16bd42ff967479e2c89690ea41268f1d1f868e2628482eafdfa53a0d69ed7c21ecc68ff0859eef07d9fe02f4844fad5f13df26cee6cea3a4254446096
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/92c1bd54b0474244e35c51952966a55b
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/2d7c3b60ba8b11cf903bc5ea720193852027cbe61ea0c8d6fac70be8f97691da3d36663aac6e61b68185dd83b42d09ad61dea973d9390271210d690295e4902c
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/c495d594f8ce1f701d1bab54d0b60521
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/0261bf45403daccf236723383341dc791e9cb3b291bde97812378d85aed785f083d5deea3bf806480a04ef1b972b00dccfd0537e43532a066c64733b817c3d77
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/41541de24d625271bdd5fad867b8eb0c
+Clang.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/595226ad7ef75ab8ae03adb456b4ee9e884e9554c720b6c4ecbc38c75d446ddba7898be94630673074f09f40c6dc3e18fea9cee5a91b8b0e4727d20a180f670c
+Clang.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/8bd8ca0436611e78882939067f6277f7
+Clang.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/27c7b06e93fb0fb516b1b240e0df6c95e8bad6aea04d637ba065c6fafd087bfa94d9136afd39273c8d82d9c467395dcbd7b16f6a4b829acb0c0d4a5677676a5b
+Clang.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/424bfbd7b69ddf7b1199afaacde3e028
+Clang.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/9c48d949309aef6ee39371ff39a4f12c31bf3f25ddd288b317b2a17a803db73850cba2886598a1d10c4c154d511a4b79958d1acc012e92491a63f3925c522873
+Clang.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6b0b3e045ad64ecdc9848898f30d5f34
+Clang.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6c0f4bdabbbc94fc9e1fedc138b0bce99d383e380ae7222fb70f5935f17701d549f6486956c8a21731061e4bf60bbc52794f6ce6858b4d2adb89bf80f88795c0
+Clang.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/3b7a461ebf957756aeb2a2455b0a298c
+Clang.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/74641a3636dd58c69415b19f0cb1de444215e22cfa9f0268fd549b5c53b206811d8beecdeb9692285613468d9a0569e836d225fb8361218438346914f6282839
+Clang.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/5e7b9ad5fc3af3bfdf262687cd248dfa
+Clang.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/c54835fdf8e3e442b7c774d445c2f13c5dd8b3224f4ae165e72cc893ee5453d0112a9ca6d543b17f2c02a89471e2cff7cf022dc4c8188a5df25d101dd0f954b9
+Clang.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/3204bd8074d42920a6707cc8624c0dfe
+Clang.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/74b26c4556ca18645cc15647d8abdbd46fb94c75169934af885e5773a880c066b2ff221402fdb4a53417b2c97ce589783f7fae6a8d56ee89cc1f70577b02b2a1
+Clang.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/7922c04964e0c1a5b44e95480290930d
+Clang.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/4f0d675c1b85dc3e5007a62a7cfea412ca432d1276a259db3ed5a1bf0f33d6c555f16010de717a62e0e065e7c1dbaa66c281815eb9629d2b6c720b152820e582
+Clang.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/e023eba0ea0a327f53013d5e4d50d0cb
+Clang.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/9fbdebce9c7375a20d1cd10e39a0c26b131af686cb5771034a6afc6cab08855e0cada2add616c01394424383333950d0dde9c55a9477fa139cf0ca3fc438b229
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/a6c7d64ede931fb19e066a1c191e2f6d
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/1a085a4ea1efb910f2b529f3c0e51be4a5e31debbefd00ceefeddc352b36bea6d0de5a06ea7d509098d16416b536ffed3da8485feefad7a2f11b1bc148a0c8c2
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/692af94ca3e5c3d229cbb459e266aadf
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/b27f05cfb0ada89cefc5a6f6527583b6b43d03525954d5b1ad1c807712efdb8750ea558a230b587a0c0d9e77c54d9f8978cc2f3884653808c7409eab1b32a055
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/3b59b6aa4b18b5dbbc632811f2ffa270
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/f8c4b593f969c723ff1931c4875ed52497d83d74b94121890e10c9fcca5f6bddc5067555dee9949e61e426586ae3e568375fc44f318a07b70571ee34fdf7032c
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bc4be32ad57b13c3dabc80684a176ba7
+Clang.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/19a8346547b6c6adc2a9156e4b913b20137593752efa3648ad532b08de67cf015bba1eb023204755f48904c3381a3665c6c54fc8233c50e887a22ceebc652303
+Clang.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/05f37d069c7d59ec245d961d0928cb37
+Clang.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/3b0956fe770fd9230319bfcaefab4922f9aee3df3e8516edf81cb7d322132ee9ab899af4464c75b1042aa99e3bcb07ede6de5646bba2a57995fc2eb32d4d0861
+Clang.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/0304434211ff4101a148fcc0c96455d4
+Clang.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/a033dc589fc95e63547b7ca82964116bec33ad6e78ac131934d4bb16988756d36c24d74761ca93b0e47dada1f3d2a63071cb3721ddb9af457cbeb164fe5f0f54
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/4e5d1064d90f24d57d63f08b61baaab5
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/cbfbe8b6f2be80e59b69d25d6af901ccb4807b12180208b69afa7223dd7d5249255265bc319c9402a1b0d1f0995940e3e72d7ecf1009f60d83021f8d35626a46
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/22fead15b4c45398ca869821d04ce015
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/2ee7a7d3f293f7b63c89bbe3b541722c502a840883804ffe272848f4ac99b7a8ed350ebe92ec434dfdf03d1f4a5531c1367859f4a4603c98325abe5a0ad71177
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/46dd01b10377cc3d45c6a42cac0a07e5
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/957677ce4251938d0c5e066448762b38a21bcce5ed424072ccd58085167d61b7e45a88fe32375f6bbd43dfb579b65a9afc09a886a650fc634a8fb9c81f27c9e3
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bd9a61ea186a39162201341f0739fe84
+Clang.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/7a06d2a9ef20e88daa00d627d482ebbb6bf7223219d8b2a24aa60ac9eda24649d206b093d5bdb88b65c1e2b0d1ba0ad7dd927697e2bbac65bc9b42f9d14ad0d9
+Clang.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/60c98c6cc7d4446fb52b7585bc8709f3
+Clang.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/4d55464b4499a45f774e1000a8b015326d114103a3d348fb263367e5506ca6659444ea6ee2767712903757e83939cd446aff6fe2351438b644f0057053422b58
+Clang.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/90a512d1881c4af1f1abfd5e90e37356
+Clang.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/62d6d855aebd49f132d6470c7b0d5a0b965c6489b025046c1ea73fc53336030d6c5b4c867523a9206821f7fcf62fdb37ef0b7ff4b5eb04d07f40b65edd2c8e0f
+Clang.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/c9eb9acb605d774db9636b82bf2e5f41
+Clang.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/96e1440b3b0378edf8907d4cf779b1c53d63f6d00fa798efe1b6aaa289135aba8fd00a8d6f55d9678136e9e07d0c189293aec64f46e66788b938e1f8e1fc2199
+Clang.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/5837070450c81d44395468d8e3671dc7
+Clang.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/0e8b674c0360f9586f03c7f5d0ffd5bc73dcde1e88eddf7d6360c1461adb8efffb104d8f454116a6a6cdc909973d0876745590b21009a9de56e12ce6e1c2e8fc
+Clang.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/5c198d35df5cf6435f4f5ac91a78be01
+Clang.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/9ba0a532f499933320145834aec2b57a70410bf67af649ed675f00aebfd59de7c80e6f5d19e7ad57029a573090e63c5eba4b42b498a374810b48c8668b50dcaa
+Clang.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/8ac88c856d946e29d1121426de44e6bc
+Clang.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/94af63ad3fb17d9c07f5256e2d474effc0e3d5ef66f4a9f3ffeb9bdd8f1577c35e4d0aceb8b4746ab857d8f164141790ed494b7f687e644e040d2f3820f9e1fe
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/b4be546ff44019cf46d3250dd9a4321f
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/7ce5e4d68e18021392355359f59931219eeec3be4edd01f7a18b7bee499b589414bcea73820ee38dbc3b5ab12d912a93374b4a616b10ba491f5d41b6b33f3d9e
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/4616c348320d8704215d58c7268de6d7
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/b4c21147ed21d41321e04b092d47f99338c6ac7d50b8328ceb8ae26d6382955cbcd655dddd39f0de3d3c36a5fda7084a33272aad9f6cd9585c87fee68be73a68
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/bf9cf2efb938b68ac7e1560c464f9051
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/ca29438393d393912571a96ce59bdaadcacbb329342c42a0de0e8d8ab52f69d4e6966822c0743d99b1a277c8715c1f72ddd490b781b45bd691df2c137ed42a1d
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/94138893eaaa99f37354317bc13cf7e0
+Clang.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/1b03d8d4e407372875667f25f74abdaac9be0b81c6229dc1c4c1714589efde6b1f8c76302a2545b103ee4f9812fa78f9e06e5d5bb5bc3903ce579328899faa2f
diff --git a/deps/checksums/libuv b/deps/checksums/libuv
index 41a9a5bdf9722..6887c3fe62f41 100644
--- a/deps/checksums/libuv
+++ b/deps/checksums/libuv
@@ -1,34 +1,34 @@
-LibUV.v2.0.1+16.aarch64-apple-darwin.tar.gz/md5/132266a501144f34eb9b8d5199db43c0
-LibUV.v2.0.1+16.aarch64-apple-darwin.tar.gz/sha512/e466ba8a2fe916f0e2dccb1d1075a6a20fcc5d5068d2375c940353a63522332fa8f665461adbb47ad4d30dabaea011b8e72a603601da29a071d98c7d7d130f46
-LibUV.v2.0.1+16.aarch64-linux-gnu.tar.gz/md5/1ae3018d9ab8bb293dbf6277c2c209cc
-LibUV.v2.0.1+16.aarch64-linux-gnu.tar.gz/sha512/6e56876cdf0fdad1aade6435edf980b286438ee9fa695fa4e262b47f7ada6ff69535c59d216daee3eb1d061a90c2c16fd70d21438776c54addda93cf275ef1be
-LibUV.v2.0.1+16.aarch64-linux-musl.tar.gz/md5/08243e727c7e957f5972a200b5d89113
-LibUV.v2.0.1+16.aarch64-linux-musl.tar.gz/sha512/4a684f248704b16b882d66ed7af60e2217a0b98f476bfdd1cb545d3e2adb17f6a410bf09e270c1e2623e550b36639c9282a562ab415850dfea98736ec03fd000
-LibUV.v2.0.1+16.armv6l-linux-gnueabihf.tar.gz/md5/c4dfccf5a899782715cbb0ca0197938c
-LibUV.v2.0.1+16.armv6l-linux-gnueabihf.tar.gz/sha512/ecdcd655865a532187e4e98cb21ca68e62303813cad585de83382aa226d965213f24fe7a684e1189fad11b0e5f2f4b318c122f557a6117f61bb2948b51e16a76
-LibUV.v2.0.1+16.armv6l-linux-musleabihf.tar.gz/md5/5382dae963f3003aefdb119377a45e82
-LibUV.v2.0.1+16.armv6l-linux-musleabihf.tar.gz/sha512/f901c2965e8f9ca52900180c32cdb70d8adc13f12f076c1b109d57b749cac1ecaac3c72e22531e6fcb79c8f2c7cf952ff563779d3764b015b73db079f2b171cb
-LibUV.v2.0.1+16.armv7l-linux-gnueabihf.tar.gz/md5/9c4cd82249c03ebeac670e2c7c8c1078
-LibUV.v2.0.1+16.armv7l-linux-gnueabihf.tar.gz/sha512/ee4b7f866e3f63df303d00d48d36680c490570979bb7174c12cfcf9efaf48ea7ae90aa05b41da8ab686de93c910c5a761f31da22845ad48fd980e9c16437cbfb
-LibUV.v2.0.1+16.armv7l-linux-musleabihf.tar.gz/md5/5255d7e320ef37eb63d0e85c4b86d20d
-LibUV.v2.0.1+16.armv7l-linux-musleabihf.tar.gz/sha512/5bcd3d22b1e2398879e654bb550fd093891775c64cb48bd179c4f9ff8dcbff23eda91a66ea14852ef5945d5c114732957075e3b3fded4cbd3cca559fead842db
-LibUV.v2.0.1+16.i686-linux-gnu.tar.gz/md5/7f0fc52beb13dad773c6ab54deee7a62
-LibUV.v2.0.1+16.i686-linux-gnu.tar.gz/sha512/cb1736eab4fa1be89018b3c77c3551a99d0fa761ad2f1947587c215d87d963d43198ce87574b6eb9d1fb8a93abf1ae89e74fb8a3f3fb9c4fd08a49e04b4335f4
-LibUV.v2.0.1+16.i686-linux-musl.tar.gz/md5/ed22ccd7eaa09ed9c71afc0c6affa423
-LibUV.v2.0.1+16.i686-linux-musl.tar.gz/sha512/7f3ff061c3d7d0c3c0c0be3e4052aeed39f35e1ba0b92f3ee3d9f266f26d064acc153c08054a22d090167f00fef3c27ec54e836de35f348e4849baab301f7fa4
-LibUV.v2.0.1+16.i686-w64-mingw32.tar.gz/md5/7f1fe93df0b741ca30c4fb64ff9ac9bd
-LibUV.v2.0.1+16.i686-w64-mingw32.tar.gz/sha512/9d71722c538d8232d8510fa2a43e7a52271b078401dfa838de9eedcfc34a2483aa3b1c221b17c41353b54554fe76d86b4973c5261b288228a91f0cc92820ad93
-LibUV.v2.0.1+16.powerpc64le-linux-gnu.tar.gz/md5/b796de6c75f18f318823e3e1cdd316c8
-LibUV.v2.0.1+16.powerpc64le-linux-gnu.tar.gz/sha512/f8dbb98cb49edfa06a0b48fbe1e658ca5a9bca13fe33d21872a012deaa1052a495faf74f90c0dfa48378b9f4f51f1045e01e563aec427d8c89d50e4eef0e4938
-LibUV.v2.0.1+16.x86_64-apple-darwin.tar.gz/md5/f2d55b315fa1f77b632a461530bb6b3b
-LibUV.v2.0.1+16.x86_64-apple-darwin.tar.gz/sha512/eb40a193c3bca5e822a417879e854877b353a2a04b03a721ef4125360f1189a3685d2751e2f975360a2ad4c37e6043485a54b5349b3da423b8aae73d4a095d04
-LibUV.v2.0.1+16.x86_64-linux-gnu.tar.gz/md5/a573ded4f78f8677ef73594be9629638
-LibUV.v2.0.1+16.x86_64-linux-gnu.tar.gz/sha512/c5809635be3ab5dc53c37a028e58695d89ea91eee850af22a0e8db10ea021640f1e618a553848332ee6df66eecd08d34605e335aad46ece82365a3525b69c42f
-LibUV.v2.0.1+16.x86_64-linux-musl.tar.gz/md5/5bdad561b5db7d19f198ef090ae3ec84
-LibUV.v2.0.1+16.x86_64-linux-musl.tar.gz/sha512/6662c8226f22f79f8c40857a5a531841f013031dd2e9536568498bfd536f133976ff71d0cc5f56f1e0c0b7f2403a35c2ccef9117d9e0d7819771bd492194f20d
-LibUV.v2.0.1+16.x86_64-unknown-freebsd.tar.gz/md5/f4ad9e445e4b14e2b59b2b77c9ed72ad
-LibUV.v2.0.1+16.x86_64-unknown-freebsd.tar.gz/sha512/a78deac6d8321f274a229961620da4d069ff2accf7d1ed9edfb01c21ad47eb33d364ba2f310ff4a93b2732dcd16f6d481843dbcb273770d731fd528f9c7a9ddc
-LibUV.v2.0.1+16.x86_64-w64-mingw32.tar.gz/md5/72caa067cf24e304955405dcb4de195a
-LibUV.v2.0.1+16.x86_64-w64-mingw32.tar.gz/sha512/de80ca98d199d3c5626ebc771325806ce3aae5927220201c2351207c10ff67791d2865f76e41519df88f0be3da534342965e7ba0d055d807c4b2b6c78bd2427d
-libuv-ca3a5a431a1c37859b6508e6b2a288092337029a.tar.gz/md5/d1fbca8bcc5819037b8b81ae4f61c357
-libuv-ca3a5a431a1c37859b6508e6b2a288092337029a.tar.gz/sha512/e735861923c0fc597b53eb2efb56b26acec29e3fcae7e76d349fc08f8b9d340df9ac60a1cd245e46a434aa357ed8e377734c1c97bf08bd044c9ba0c02b082a6a
+LibUV.v2.0.1+18.aarch64-apple-darwin.tar.gz/md5/f176c76e5e2096dea8443302cf9550b8
+LibUV.v2.0.1+18.aarch64-apple-darwin.tar.gz/sha512/4301b13953a08a758b86e30af3261fd9a291ce3829b4d98e71e2a2c040e322e284c5a6eb4bc7189cc352f4b1cf7041e2cfd3380d511d88c151df3101ad74594e
+LibUV.v2.0.1+18.aarch64-linux-gnu.tar.gz/md5/c81515783363702a1bd4b65fd6d7f36b
+LibUV.v2.0.1+18.aarch64-linux-gnu.tar.gz/sha512/011429365337f5a45e56ca7a42709866bb994c747a1170d870f5f3ddfff2d36138866ee9278ac01172bc71bde8dee404bcb9cae9c7b44222bf1cc883659df269
+LibUV.v2.0.1+18.aarch64-linux-musl.tar.gz/md5/e74d5ea4912dd326b2705638faa7b805
+LibUV.v2.0.1+18.aarch64-linux-musl.tar.gz/sha512/a26a9f2c9051816230324071c502321f7af3885d581a400615858a93a4cd457226048d15b0e7f6a73d12659763c705b5ab519e9f5b35c6d886b9fd5babbfe352
+LibUV.v2.0.1+18.armv6l-linux-gnueabihf.tar.gz/md5/6df38bcf5d0a61dee63d16b73d0c9a24
+LibUV.v2.0.1+18.armv6l-linux-gnueabihf.tar.gz/sha512/d5354a6532061de0a58965ce0e427bde52f9ae0ee39a98e1a33de4c414fddcba9ba139ddf91be7321a4ccc97bbf7a8a8357ff10cf60f83c0a6bff7d839d6d7a8
+LibUV.v2.0.1+18.armv6l-linux-musleabihf.tar.gz/md5/6f02a24cfbfae3032fadceaea1faed39
+LibUV.v2.0.1+18.armv6l-linux-musleabihf.tar.gz/sha512/7fd107eb9a5ea84b488ea02e4fbedc9fe13bb11be859986a47af38f40ad775dd9f738c790878a3503437bcac1eb26ad9fe26f4aa0d3cb45c980b4c5abc9aec99
+LibUV.v2.0.1+18.armv7l-linux-gnueabihf.tar.gz/md5/96b09dec72f7e9b7409fa2920e67c866
+LibUV.v2.0.1+18.armv7l-linux-gnueabihf.tar.gz/sha512/6a0f79fc15c944fabba5c65180b665bc9769c6ff25863e330049f48b3a4394b448492f5a9a76bb7f8dbd3ce44dfc6f9ccdc2c71c42e1c749e88070fe99b1db69
+LibUV.v2.0.1+18.armv7l-linux-musleabihf.tar.gz/md5/f44e4b2521a813181f943895bdb0dd3c
+LibUV.v2.0.1+18.armv7l-linux-musleabihf.tar.gz/sha512/cda1413dca817f772e8b343db0c6de0ef6b8f269e9a6a2ef3403c2582aeab554f46281bbb1eb4659c259198ef47fe26aab648a281e66f80aaf2f2cda0a23ac05
+LibUV.v2.0.1+18.i686-linux-gnu.tar.gz/md5/1f231d89cf9c04515d2d107a5d786cc8
+LibUV.v2.0.1+18.i686-linux-gnu.tar.gz/sha512/089cb8a372cdee0cbc0e78fc201611bb9bafd99af9a78e09d6097a6b70e7c4aa001ebd86f944b0a885c072093c529bf86bcaa32bde4fc1934407a858c1a5a764
+LibUV.v2.0.1+18.i686-linux-musl.tar.gz/md5/01cfc2a9e2536dbd330267917abb19ce
+LibUV.v2.0.1+18.i686-linux-musl.tar.gz/sha512/72f3588cb464a60e61f8998242aaa2abdf93df920a2feba5e1d66ef0f2498488df0ec415cbb499d7f75c47bdfc7e3a2fdda6a94383492e0ad13e464eb1314ff8
+LibUV.v2.0.1+18.i686-w64-mingw32.tar.gz/md5/8c6599aab9ed4c46e52f03683aac664e
+LibUV.v2.0.1+18.i686-w64-mingw32.tar.gz/sha512/13f0565f7244a8bcf1ab43fac91a856dc86d214877033a3cefee8c2179c1a275dfd7dda32e9017763acac2ba42ab6799934a58f5feaa38fb6cf2253dd713f57a
+LibUV.v2.0.1+18.powerpc64le-linux-gnu.tar.gz/md5/af0e43d9d0aa91dd82b63220d96991ef
+LibUV.v2.0.1+18.powerpc64le-linux-gnu.tar.gz/sha512/9fabe3089e4fc60e910770c32d36300ce8ef36c28e8cc9c72fbecba6eb80285ee8174e84e4452fb4ce90ee7c7f94e99b03fce47d8c579bd614bfffd553f93666
+LibUV.v2.0.1+18.x86_64-apple-darwin.tar.gz/md5/871040e874eedae54553d8f1c91b9133
+LibUV.v2.0.1+18.x86_64-apple-darwin.tar.gz/sha512/d5eee08b65e4bb8b444c61ac277bec9ef944b9279dd7f0732b3cd91d47788c77938e5db71e019e01bbe7785a75df95faf14368764f700c6b7a6b9e4d96d6b4c2
+LibUV.v2.0.1+18.x86_64-linux-gnu.tar.gz/md5/d2d186952c6d017fe33f6a6bea63a3ea
+LibUV.v2.0.1+18.x86_64-linux-gnu.tar.gz/sha512/15501534bf5721e6bb668aabe6dc6375349f7a284e28df0609d00982e7e456908bd6868722391afa7f44a5c82faedc8cf544f69a0e4fb9fb0d529b3ae3d44d78
+LibUV.v2.0.1+18.x86_64-linux-musl.tar.gz/md5/271d4d40a1ae53ed5b2376e5936cfcf9
+LibUV.v2.0.1+18.x86_64-linux-musl.tar.gz/sha512/1956f059ed01f66b72349d6561b04e6a89b7257c0f838d7fbdd2cee79bd126bb46b93bf944a042b5a6a235762a7a0cdd117207342dd55a0c58653a70b4a38d48
+LibUV.v2.0.1+18.x86_64-unknown-freebsd.tar.gz/md5/62fe8523948914fbe7e28bf0b8d73594
+LibUV.v2.0.1+18.x86_64-unknown-freebsd.tar.gz/sha512/e6486888028c96975f74bc9313cba9706f6bf2be085aa776c44cbb2886753b2eee62469a0be92eb0542df1d0f51db3b34c7ba5e46842e16c6ff1d20e11b75322
+LibUV.v2.0.1+18.x86_64-w64-mingw32.tar.gz/md5/ae103f24b6e1830cdbe02143826fe551
+LibUV.v2.0.1+18.x86_64-w64-mingw32.tar.gz/sha512/f814085c135815947f342ff24fa0e1015e283ccece84a5b8dd5ccec0f5928a129e5fd79100a33b131376ad696f70b5acadcc5a02a7e6544635ecf7e18003ba1c
+libuv-af4172ec713ee986ba1a989b9e33993a07c60c9e.tar.gz/md5/c1a7d3c74ef3999052f3bfe426264353
+libuv-af4172ec713ee986ba1a989b9e33993a07c60c9e.tar.gz/sha512/a3f16863b711ddeeb5ab8d135d7df7a4be19cc2b9821fc78c8cd3ba421231d39b7d8bd9965321455094fda01584842a58f60612d93082b4fe32210b8aa44d999
diff --git a/deps/checksums/lld b/deps/checksums/lld
index 9238ed622c27f..cdcae063f68ff 100644
--- a/deps/checksums/lld
+++ b/deps/checksums/lld
@@ -1,108 +1,108 @@
-LLD.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.asserts.tar.gz/md5/3144fe910aa5fa308a2a2ca86820541f
-LLD.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.asserts.tar.gz/sha512/6b60bac8ac870c6e0f2f615ee92599c863e388bb9a654ce7dc6b037e6f7ba77b4401f88471dcdb2c8418775a833a10b010bd932a61c4264b032f5bf42642559f
-LLD.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.tar.gz/md5/f65548e0c2c455550635d2821822e97f
-LLD.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.tar.gz/sha512/52862c78a5bd6a33848ce33c79eabad854a5cb86487ff755160f3a7c89ceafcc24773495ced5d7d25e952b7a7147969a890de6806845996a0dcb3ecd8c1ce1cf
-LLD.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/e4b46d1b3397fbc876db8f0a15745f3c
-LLD.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/831ae6748e0c18e4be6a732dee56bfc3b84383e7c607828f72ba798db0bc1f61e9379edb904cfb992455ab5ecc6d4ea7dae4bd8eba481a857afe6439fdb333ac
-LLD.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/16d60350522a797fac1fc3ba47609d7c
-LLD.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/6d457e4de4a896bc4d8742a9c7a09c334f9f0fee1fd5e93133128889c326cb3891d7b7f774a01d1936717639bc5e84b7a3d6d39866cd6e689de78cecb5934f80
-LLD.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/f55f1eca81cc38584c94a8db9d53b53f
-LLD.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/70abc3cfdf7c94ba3cbc26aaee3328e28a5e19136bd659b619a6240d64d50029f94ae36e5ca4359caf1db79e13e6283cfd7b806e96fc3245586970edaf678a0b
-LLD.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/e7370d074ce08d8de4aa6a34ba7f4efb
-LLD.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/5491fdf2d478abacad107a7de626d0f70388568b8f50732a91f30a93bc79a683d7acfb37a2ee9dda39f860fd9af144b322400fa0152f52041fec13a4ac01f511
-LLD.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/md5/f75f229860bbaaf61d8ab8d89745d5b2
-LLD.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/sha512/046bd2cbf12a3d381821f87800635957bcb0bf37d55bf4a8046ca39fc652c2b98ba0f8497216f6c580b738db319932e8610c6215b020e993bffd129f730a4d9d
-LLD.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.tar.gz/md5/21c591ea3725c02c5cc1ba861b435966
-LLD.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.tar.gz/sha512/600f590486acf10dbde10dcbfa728bd53152ee7134bbb68cc15f724017f8b5e50102a286ae7a836c197c974d34078ad8e8988cf97ef032ab97d9aeab96ae9994
-LLD.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/md5/89770e2c5fbd1f36775081f37e8374db
-LLD.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/sha512/650e5fcac1f00c2ed576d9232df46325dfa2e283976ec84a7cc12c8127d532e3b4d758a736e5ca1efb5a09833f0369ab44b277469fb49d50ee14ddd9ebcf6a8d
-LLD.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.tar.gz/md5/4e9b983fadd5fec3a2b76698fd24bbb4
-LLD.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.tar.gz/sha512/058bcf4a9cee81287d1f19e9bfe5a8d16ad7955fdf175ad0f7be6fb6fcb9177674b08a8fdc98b4365324e3c754c4b957aec24daa8c135974a2f2737a6054454b
-LLD.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/3cf9661e272471213ed27d70a28245d5
-LLD.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/4c2b7d02bfc73e2c36469b249bbdb9e6e695b47a464a71d5eca495fbd21fae35bbb260262771246f995ccb83ba1d89a5946d76cfb49de327f659d467ef3b2d93
-LLD.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/md5/432800b0bd1fa328c929f24765cc32cd
-LLD.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/sha512/338053e5aa25b4cffb10ab2064b5e1e75ca628cfe006933bc125421406a00a1b84386a4ad027fca75211bba9860cdcf0d1c1e992752603ada97442d97cf17f20
-LLD.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/067465123a0343a6c9d8d9cec1a6c3ee
-LLD.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/895bc632e732086ac7676e9596c7a6ebc5f807fb49bd2cb542252aba76aa4faac8e7bc423681e3dd1556bac5fe5754a5e09490e2f97e40e0551263d021987290
-LLD.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/md5/27f10a51c07df6550e9945913b8f40be
-LLD.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/sha512/fc8fb0dba3aefa19098c3065cc0e83edabf9a3052691c6b3fac229c0b0bd605fa7062ad4f117220e51a6f6c15a0a6385cbdc8a2d8a0f46f96cd637fa134229de
-LLD.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/5d5719e989de5cffc156955875e8ccc8
-LLD.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/71ef0800633546b4945d460f51ee9e042bfcf4d0caecbd658033199ac82bcade9efe9749998396d42345c914621f658b3d7285c849f554d369acba8c7c75ed2a
-LLD.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/md5/416ed355013ac094d39cc8bd6d573917
-LLD.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/sha512/a8d74edbf881b4f4212f22c01bc37e82dcbbe0e9399823604ed1ee78ab33a5cbac5e13933348878cd7cbac0077547af27cce80becbc5a2ebc6329306c86f45ba
-LLD.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/68a609cb681b1fa2d7e8ad87ca31020e
-LLD.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/6cba7cec86f914790194652fff53996458db01d26b322d2e76321273d425605d706d631141f2ae675bbc44712f3761fa2a90874de13a280fc2cdcc98eec6e0a3
-LLD.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/md5/a3e3ae95dc0227f6a1c596b42fd5d6dd
-LLD.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/sha512/e8a7e21ef3726353d15d1406cb76ce2e79de7d59244ee9c2dba6f2ca9255472dea28d9aee68d4d806f0a291b9293a3e3548f791c06b3461234efa36eac1ed356
-LLD.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/1e43637c4e6ce02d7b2de3713c028067
-LLD.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/820eee9280781fffe5bab8464294199389de9a6000cbdb2e3f8ae3d2c474ee176442b128436cc6edb414eda06ebbccebc4277b3d0b6b4a7a0d4d232e18593eb8
-LLD.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/md5/a61866ddb1da3130bc2c58426aee6b05
-LLD.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/sha512/a193e68d3ffd1794d0107d4f003ba0ad5f5e3e72fcad1b6652c5487cbad57d9357311c8f14a8a40c2c7384042d768c86ba17f0ee5fbc3439435224273ed2cd3e
-LLD.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/e72ad283df0a290f1eab61a2037031ad
-LLD.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/99d04f3f27fde19d1e61821dbc3d1748e6687899e46206c99442fa9b7086f8ade1199c67f16cfc5be40bbfe37da602508441a5269ea741186a79ea1d221a58c6
-LLD.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/md5/bd08e53b82b168fbab8d97e9460ab9b0
-LLD.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/sha512/408557a99ba6c794eb8de0b0dcca04662a079f6af2e7a663075b8f18eb422006235650eadf32c3abde662f12f87019cd73c5ae759207dc11101d3f1c3b8e2d11
-LLD.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/df3bb4b61144f94e9ca0ffad461fa69f
-LLD.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/e9ca992cd148026fccfe47b1e8df1bb8b60e7e44341d664a3447c06030dccf2a95ffd281b9c7346524cf87daf4e72ef7defcc1233a3b5adc228adb5b20d5d911
-LLD.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/md5/3b6879510e198560d65d29f6cf238b5b
-LLD.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/sha512/75a8e70ec5071fd41031ca96e1560f4471b6e0c45ac361d10f11e41c9e63ed41e850170f5b97cf305d0e28ac695b8a86d8d777c7a3582f6aaa7953c00a666fef
-LLD.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/03eca3b921064050532b59049d2da878
-LLD.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/b7ee2af9440fdabe72005b84e05d05c28f2736135df63859f8d6ef7a10f622122d3f2d0a393ddcb39bde21ea8fbcba4a99a46b5568e42dbff2a505a5cda79e94
-LLD.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/md5/cf9ce84e5631259776238a11c3528901
-LLD.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/sha512/b64f0b0e2b3180b569528479265f15ba2e44523dc7103af3daf82ef6b9087c2859bc378d51abf44ba10c6e10a9aac4498b43797d59ef3423de1f652beaf8b6a9
-LLD.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/a85df5a842c1ad1e9db959fe8fcc39fc
-LLD.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/9f037dad3faead898854235ed48804607e19a68e4a34a28e1ea09258bda9b06c1be88de4595bb50b43234e438353db00f47dacfa0754d88e8fce01b637808f47
-LLD.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/46191b2a25020871c8c183d6f62ad477
-LLD.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/aefc623430f79050ef367edc394c72a09bfb4ec39c6ae4e31775d378d1169432695be1fef5bd93737502465363571c695f7a0a7bbcc78d78c8689a0a6b0e280a
-LLD.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/5f034672f90e88a30ced0ffa0880e8af
-LLD.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/fc8a6fe2f4e1a6ccca3744a4cc51be62ad2c849e9ae942e9c0452aada0303e95b376b55c951e8ffc22b65e391bbb182c977c979a8c50b50c1426cf84ca8073e4
-LLD.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/157f2faca0641168becea6b1493e4c36
-LLD.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/6981bebe07fba76f18e38f30aafcbf20325dd5a3f158ad732ce1d4f153839eb45966e707e0cdd008e8e739454f10f8dba0a372b0e67f7e753ed3e84ec47d4003
-LLD.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/md5/de902268f693954466d62a154a2ac490
-LLD.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/sha512/57eb929a0b21d047bf2a1da287fe2038fc9cd26bab43b013ad8794abaeb387a5be0eb7d4f9ece52171609d9b74f1aa743c6cdbdbc1eb78b425e42a1ffc862133
-LLD.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.tar.gz/md5/d11bbc63ca2635836f5b95283f6f93ac
-LLD.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.tar.gz/sha512/e8b9c2383f71be7daa2afbb0a6dfd6487c832673b3272b2759e139299491374c2c3e8ff6db71c526dc0e71901fcf81fcf77db4fcb9351dc1dab0315389cb19b7
-LLD.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/md5/8ad1aaa3ce1bd9b3aee584b6494ba6b7
-LLD.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/sha512/411d563c9856cde0834b4f035551cf1b0cb0ed853365f312889c8d85ff6e52f5470e7e9af5146061516439ad109945caf651209153db1f6671a4cb31b69abfa1
-LLD.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.tar.gz/md5/aeb833592bda8c92781a9417daac1c7f
-LLD.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.tar.gz/sha512/79134510ab99d8478b80a9a14dafaa818320b9f823c024260b6f8a82ea7ed406424f35fc9882e4590a98565098a949f8ad01fe03aea2b8146aa22827a7dd710a
-LLD.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/db979fa556737823f4629c1d19d45adb
-LLD.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/1790dd2098a07a31d3e3057257da0bb9d54dd71ee713569f9d15d35895feb143e22435eb1581d72922ff09ac5b733e0a3053aaeb2f31483e4441d7ee12bdffb9
-LLD.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/63811a8ee9ec2915aafbff884e8ceef5
-LLD.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/b8f6400246382ab9288bbc0b4a50cbb9264a8c0e2e3e9695580577df8460b7de015a0628ac92bc54ffa14bc7c03c86ee1e52d032d423120d4c5c731b8ff07ae8
-LLD.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/29e840a97982441b492bb6e9e30b369e
-LLD.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/9a7477279f129c4c500694e78b0f587285e79adcad167da212e68c46a22c8456ef41c721d8332c7f101958cbc3ac055414fdec935e0980fe2d0d507b1bed736e
-LLD.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/97bfb5e23744e57e243a9b0d3fe4252b
-LLD.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/394db3828de239afa47f6896f0014900146bf8e5ecb90014601aab86fa95dba434a78a8590ebc588d3a22b93ff6481587c7c3458dda751114a7a4b41e93a9a72
-LLD.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.asserts.tar.gz/md5/9dd9fdabdb07a2d25270cd936e8ceb47
-LLD.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.asserts.tar.gz/sha512/ffd59d483f969dee28e32166a8fe091a5ecfbb56d6c17d44c732f6f8608b14d29b4e3874d93ec2dc8563b9a4dabd61418836e6dd389aa0c38a779f527becf48a
-LLD.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.tar.gz/md5/574c37b28031d6332075b7ce3e4b8520
-LLD.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.tar.gz/sha512/f09afd7098a56ef282ef364c59a2f91db51165d9ffbcbe63f70f68999c7bf67d6ee445dfde789be79c81e173db346702d983e03fe1ca51d2a2aa3cfd9b9e9e00
-LLD.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/7bfb0b6c0ce4b271f3c0e7cfca20ce79
-LLD.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/66daf7d38f206d428e27227bc1218bb8fe32abdc50246ba6e49ec1494c48b5217697a69e3bff51b3510a4383e2ee743a8a0ad11aedbaa895ce8a56016b5d7693
-LLD.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/f2666afb8179d10cabe3bf9e739a0e2f
-LLD.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/fa59a23c4b24c444173454da85e77ae4a9aa73ab91b64efe7a6aabfe21c92e4909ec61b7b848d4b0190eb5e4ebaf0d55f8fc0d92cedc6ede76add230b8e6caa2
-LLD.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/292e0354262b845ab359adf298aecc6e
-LLD.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/654331656feb9c4fc27a5a06c838ffaa10ee7f74ee7eb98496e9d8d0882ac2416cb8662b5ac70968d6e8277ff675200a19c56989c120caa83170707c409e0cf1
-LLD.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/c0b864d0d7a830627cf0feab502eec81
-LLD.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/72404a586c46b62b0317080b925ff5fd2ea99212df50fa6631bdd2f01bf805bbc8afb2e49bde16a4b8ee7896af4d58237326cb83aa064e98509e6f4f0fff46b1
-LLD.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/md5/942a23af6c179c2715f2bba2048fa125
-LLD.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/sha512/101a6ada3ed831dbb2f580185de732b90b81ce1b6ba544fc1ace40857fb471e61901a782f467a2077b2e34952997b8d04c841aa4e9f190e1488ce37c5f6ed32d
-LLD.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.tar.gz/md5/2f0aa6b5220213d549a2430a439094da
-LLD.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.tar.gz/sha512/180f0a520fc9089ce39ae8f032d7b415442c2ce6bc9a44bc427ae98000be55d0eba6278db1e89d45e3c79c907a75264bc8185cea088d002aa9557fe1b047b42b
-LLD.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/md5/6206795db1db9f9a86346ace421fa778
-LLD.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/sha512/b86c57b2561cd8fbd4eb0158b0e3f4064cbc55936546da6dad794818eb93f51d80fac1dd094b2281ed6125416a205010e2edb801fc347db8d02d93fbc336d878
-LLD.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.tar.gz/md5/e07b22829c2e8d4efdf8c41584a3cc67
-LLD.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.tar.gz/sha512/4695f44b9a1b21684575f58dc66644907f7fd5250db42da2cfa55d90a4d5dbafc9cf37698750f8faac45ec38dff54eb382288052b7c95e82bfc60a10136ae5d2
-LLD.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.asserts.tar.gz/md5/3e183403117e11f6c4b9059fb32e4acf
-LLD.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.asserts.tar.gz/sha512/34f6fb23cc234dee826a303a8ce3bf17ddf945c2ee9a75fca4f6c625379901d5fbc4d5d9006b63d95d982de852fa1594877cdbc9451b0ca04ecac72371d1647b
-LLD.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.tar.gz/md5/40b50d3ba65160eb842bc43241eca5e7
-LLD.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.tar.gz/sha512/8cb54d63bcfa0ead6b8f8583e3e30ed676df4e8042b8a40f94aa443b1de211cab71ba4ab36ae804469876511641aeb5cd29e1843adda9e09e7b7e30a936c12cf
-LLD.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/md5/d999ad0a4c62fe11037ceed768cf8cb7
-LLD.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/sha512/3c43e7eabe62f5a71d9b2411def56d5357a23ae50b639bb117eb795101f10ee896e886040db0f57c527873f07d68b49c8eb6f64a362419ba4d6ff9fbd2ecd9e3
-LLD.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.tar.gz/md5/c2a7d97bc3b45591329284f55a821c26
-LLD.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.tar.gz/sha512/d53f16798e8d19359ee6c86e9f55135838b2db78d2e69a2b0d01c92087b9bf1195d7cdcc9e2eb5c29debe02048af6b2d7dd83c0207600143c64b5dd8be647ecb
-LLD.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/md5/0a6eb0fb8f794e4ab1ffa0ca94e69968
-LLD.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/sha512/9d1b5de37206ce411db00587a0d9dbb3d57c186ef84d2d60d20dc0c7718621bdf01dbf090ac1d2e63eec595e55fc39d9787d038766dbc0b4c49708e1b16bf09e
-LLD.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.tar.gz/md5/9b2f4c2988b177ac0928219406d5aa21
-LLD.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.tar.gz/sha512/199bac9c28bb919eb1caef1eeeb5a935183a134be3def03f401794a2241b05d62468ee9ba12836d07bbcac508304c50c4c7f34d108fcb505a69a46a0eb89c6d3
+LLD.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/64c9a9f1758b9b292e0a3ef37f16ea41
+LLD.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/cc740aaeb6ed29c56b2881e1488606338e4bd0e049ca4a5b8312b1d9129b778224570336698347e4562d632db9049e0e91ecce34ef68acb23a8bbf62455a81cc
+LLD.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/1a8e11dba5cb574cde42de2b9703ff79
+LLD.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/290300229576bb9155fe6bd24c0ee21beb41d0f2a46b208ab5a657b0199a7376c1f4cb07204c8ee1e6d202efe30ca040a6fff63c69b174120de3eb9866e344f4
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/cea134f347bae257cf5f55b6388cef81
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/16b59143e929791b0c3e56cfb4970d8b3c87adf6e847fa9e2aac17c4ff2aa311ba2c7511c1b0ae2f39d9aa92f87ad4d99c042fe35bec391ac865fedb72bd3b1e
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/5f903bab0e38fa608e8965acce6f020e
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/01e5f6a32958e04174c545f57c6c3b1bc88ccfd5ab18dcb9d67b92b55ebc7655a03bf963c4eaf7e5c3792d4691427a89db372e7534c6c8f965f8a715a32d9284
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/241a55374fd067f3736a2bb929e47015
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/f1fedea4e6b5f6f3bbf4d705034d6c51b06f011c2ecec1ae49c5b7bd123891eee8b991462d60be7fffd58f7c773afe910a06ec0b55b37eed9b4d09b9fdbd5068
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/ff018c7448a7589935333e46739ee2c4
+LLD.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/b646c6a945b8f42b396164a8e87fc2e54b1ad05681f438dfba83fdd3712a60167aaffcb0300bc42d904eb4bd34c002a71642b59540ca01e64d6fecc6daaacdd8
+LLD.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/e6ee9423a82322b9233cafb1c92eed2d
+LLD.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/c915582a9ce2dfa8721741fb1ed19b719ba40f0092c2d29ebd68829ee558cef0b044a5e40985cff88e89129cbeed052d85fa5c6b6d87f9b3a68a6e89079ab4f3
+LLD.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/cc55112e2db358cf26d7bae3211d8e4f
+LLD.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/0ecb43045419020eea911f1767dae23a6b1e81bb155ec493e911a9412e45f7ec71461aea2e6fe346e641747139cae43d9435ccecaa7fd6a234e4d69bb06606ed
+LLD.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/498b2909f80b20588135466d5211bc80
+LLD.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/120fff24e85cf970670b20b5f4509475a3ae0d7621f8f67d018f3a7625548d736a3abc89f015966b1329c6b0a08a1db832e035ee3bae384e2c5864b73a856600
+LLD.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/1bcd298d5292f8e51f19b97fa4b27ab0
+LLD.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/695c42557f9ee53b2e10bbf74653fbad4d02124b962a1f50cf719d2821607dfbb9c1bf638dbbc9e0e544f3020a9ef4a82decd13f886cc41ddf47c07a5e40eaa1
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/2323ff933feaf3754b442bee48a63607
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/47b8e490b89e04fb8886dae438e3ddcd53c4e98045de2b0def3988671827528c8e9ae296411464c0f17cc64bd3956644673f47a3817237f27e1c3ed16ac8ef01
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/37cf8528666064a434296f2e0039e9c6
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/ea1504a859509f8a16030db7a65f42f0e78d67adf5946497f2178bf25456c0f2583af72c636881a4bdd1210dc0d377bdf300ef55aef5db8c56995424a1886059
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/1c341f2b161e2320d3d1a74685887f54
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/4f6fc099293deb1a2cf729ea7edd6e17fea0dc8b9fae9acfe34e00b1f5c798933df9538c805c8d28c6279eb38f9ebae2a1aeb1a2f23087352c6eeb3b27b63ddc
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/e306d59c71b0958c77108e650fac2612
+LLD.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/79fd7cec0e169a9555ec9b0acc3248991e2e37a1d5bb422808ffcfd4f47e79321560b7985c82dfe070fb0b5ded5c160d83e358399c6e7608eeb62cd4a1406f88
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/c1d080f1aebb58778d730578fb113290
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/1f420da1897bd0a61413321aaaf032e8ed38d59e6dfe3313ca3a6ee6582ae6c566e3761ca8fcd1f5a964337ba8a9b3e73dc55ad68aca139beeb45e43d51e862b
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/6f4e0c7d2fe9ac254650dcd2842dafa8
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/bbc71b334250e5e6429766d88595adbb671a206630987ec2a27e05711ff0f844487dffc1c136052ec11394e9d5c51c70d1b75d5348f97d3bf7fab463291e9dc8
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/76925b9a7bc249b2227390c479c54f8d
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/20643ecb79732e3ae9666116dbd0763c18b808afa78e6a14998aadc7265cccd6efd28670592db61d3d27b8d3023be4c5f3df41fff9e1b38d61abf76829090b4f
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/399b9aac140d9050088fdb187ed4645f
+LLD.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/8bab65965670fe392e78d0b9dc78c92cdcf202898f6d5a3174eb89ca5cb95b995675c8a7d81bbc4e95e490ad1a43d9d29d7907b7006789c0143a1d8f24cccaeb
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/026a4f5ae9eb3ac05e5e8fa894d77a5b
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/4bca8bd558619260cddf4e2f4593cbb2a0691b5ccc6d1dea6dfcc5a2b5f51d7d1a76c35e481244e211e2eacf32bd628df5ad0e6c75e5185bb1d9b569f6acbfd3
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/f898ceabcba052b7e6713a2b2c208a92
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/92be1910f795390be5f15ba5b2c220a3209a5f7ac04fca3f5229486628bcf5d2f20cf6e4dda8b41d6beaaff42a68a9ddb95fdacc6eae33b9183b581e9a194895
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/e366058cf69a4367945bdba9523f2a0b
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/45a786e8d0162bd5bd01c029691d2928d3744ef4a7a1efc2e39755dee2f9a9ae23ee725f0454ca601cb9c082a342209e9129df851314b5757c74767b13508fc4
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/665a8502170729c86ea95a7ea2fcce0f
+LLD.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/c1a2a85c9ce14af8c91bc9a599393c52c0b8a585057366b1ceeed34c5db44641ecd0c9b377bee80cb4951fc7102fbb4f21fd050126bfa5bb4e582ffefee17035
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/b90b2130262f63f5189cc8e4a65e4433
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c1cbfd38c82d676c3fdbec486691334cf7bf4115d9ef2665012b71725c28545a49f34edf5689ea0352822c811c24c89cc152d1fccd1586b17ae8e6b2503641df
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/2d5360da4b2c9ffcea5d0a646a7c114b
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/73323e0937fe4423883480294c8df44744acde4f47380e35535cbe69c855c0e35e86a1eced3085ae0285f284f47af5ef237f4650bf2b6a8b9d5308efce88fa02
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/a9b9a65938a7701aaac6fa706481c867
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/fe8243aa131ad8be54f0fca5754c2e68ec39049004ec8feed499731c5228a7a46e303ba866b9f9a55e5318c73d8a46d964673e111f6c60e5ae1628c568d7d894
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/0d9592a287c9231ae2db65000be2cea2
+LLD.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/4ee192dd33f518d2735a829ac8f822b5672b39e8c2254987aea6e5f2f0056213bd85d84c4050d52ba9ac8c35762521c324fe2d6e18db0396e7142af9cb61a561
+LLD.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/d487598dec9969485dcf785fc0968bd4
+LLD.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/8d3117739919696b9b0c9ae398f1b1e9db8bd3e2e27839f62b3551c22ae2517f8abb69e57e23d125002bb466889b7352e69c1e9dfd9abf1c5433f274e928b341
+LLD.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/943434b08dffb54e8cf04ae7bee34923
+LLD.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/77b7bbc5d988cf36ecd10609e091cf24dea134cd32c7ee96dec7bfe1a4942553b6205653edc16c8454261f621966daeb267f42562172bab4cec9693ad733d867
+LLD.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/cb9e371947ad415de048636ed78ca48f
+LLD.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/c00b696fa146e8c29b37f15f78ab3325db9b3f5b3514e615f145b4eb7c9c8788662cfb6255b7dead596cad8c576b378f7459c2c85d462b597ba5e21adbac0536
+LLD.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/485f061ee8425f042e4dd3042388bf8a
+LLD.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/845a47a36c61b305bb70b1249f6fb7c4e8f740acff90d3e850ab2e887f7d959ae263431a02305bf7587e4194463f9932769d500a19709bc479eb6e6168325421
+LLD.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/f234526410e779188f3d22da438a4926
+LLD.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/12e2c9fc5385ff142bf82956268230fb01a6f1a1fdb3a6c1e13afd341dd2eea970b707168d5f45960dc9ebbf6d6598af0ceba371172f624ae823ea1eef4e9031
+LLD.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/e68cab4aec1abcfce12a13e3d1f67dac
+LLD.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/67755b34ebe04f4d28be3be2a37df46b5e900f38dc4908875f66671fbb740cf033f2fd9af5116635f55025f330f7b1a478cd4900db9d00e4699b591a16269100
+LLD.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/4a71aef80b75b2ea1a5b7f8521afcf5f
+LLD.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/9deb3e9615ae15dba8c744b22416243304d30f100c8d17538fcedbc18787147505f74ecc2f933fc54101527847503142cfe84a46a95ca3c57987996e3b8583f1
+LLD.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/9b28ee75d05cbaabff57fd45cc0d1cf7
+LLD.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/bfd3d6cfd4a5a2fbfe940f64d47a86a598360e90619f8175a2d1306f0894610f13fc44ba099ad59d2989beabf491df08a5611bcef3fd61b6391ea0230b11a432
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/7962fc6f08531f0dcfa44bd667f31582
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/2c936064685f12ed6764c187192023118e97dcbff6ca1656f0304a40772b4ecf55ee0296b3c2a00760f5bb437162e2b737635fdd59b889d35756d697fc7e6b72
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/3eb4d78af670d446f696449a5e71e3ba
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/315dc76799f3e443fdb5ebbecf96a08070f8251930a26995de892b8e67bd35bbb365f2cc5fd93bc7cbcbc9edd08280ee8d2a36b28a704866dd3fdddae4969455
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/e73cadd0354897bd5bb611cc1c027858
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6f444a4ea22e7108ab75686ce9cd78c0db0a677e39e8434896fb1ec90b9dc013abf7de1024d329a9726dabf229a8a68c27a11f211092e676715d282efb7b8504
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/aeb310f106f31126dbe53459e36d33bd
+LLD.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/cd18c115415dd92bc7fbb5c29cacc5848b1f3851c3a526ff9c0813ad46824df0a4f13a66b1e6641ed11b44b5b937390619f01666fe6d5a047f1772f0ad03c941
+LLD.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/300dc28f7af6aaa69cec9a214a57fdb8
+LLD.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/dcb40c5934118c204968cb963a3fae91179eb1e31f5397975ca98c8a7aaecaf2a7f81847bc426fd306bb76970794502ed4f94d8f461b96ea90362130f44520e7
+LLD.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/e1f23fef82fbfcbc28899677f12658b3
+LLD.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/b6b585060832d53827376ac6c00cc8bd5dfbf091c38c87020f6de42adc86dbe4fc33ec2c17f4433176c79a509681d694ed1502b179efcee2c4dd4c10a26e87a2
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/5dc96eef71dc28611bc998ef966371c6
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/781993c75bb07db96d02b5a7e779116864730a9bb941b64420a435956a7ecd501b5b2673f1854c09ece5f0c73559d5723c271d6352be57ddae6801a695629362
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/8a1fe0ccf7699ab7a7a514b620112a70
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/d002083045d3eb7c749f2e97527c1228cd317a8138ff254228e43594a6cabee47fa363785466ebc2874cc438457640ff08a836eec7334afac451506ea7bbed03
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/331be92bd3d76bb8e86991b7832ad41a
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/7b1c6df53311a17a92a41cb67ec476f947949c4ca5d15a643badaf9f01e76a186abbb6e156f95ad1605d83250df4e081164986a6b7fcb3238076b0ec5a3bb565
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/97c7f5267ad6927f699a25ce44f55a70
+LLD.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/7b847c6026fd7daeb17a4459b852562ce6664b2f406664be672bcc384dd5a79b9505561fc61dd8fb78a903a2ed4978f322cccad19f5a3966bac856e877c11ef7
+LLD.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/c86da6a396fcdddbd26cfd71c0f70458
+LLD.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/8d5b75b43167080b8ea456e516c9ace02ee6066ce715a56f0b42cb8045b965b1cf8d4ebc0786c23be4544693ff858816a6257b0638ec11e077df32ead62f7efb
+LLD.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/d72e175272ed893688d18e868120c575
+LLD.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/9a46eeca8c7a8be65ed487a74227534e08a257e404814c44730f12a5bebc8cd160998cfd5ed30189aa606ddbe602e1b1788e465e4a210103c6726a7fd230abc3
+LLD.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/0206fdaa9582ae3bddaed1b6fd7a8cb5
+LLD.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/584a67f603f656ca5d27aa0ef2e425ad385612aff06cdc1d534b5944939a09246c93954fc153b8a89acff721e657a8903af9a640abc252d4e452f348781bca98
+LLD.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/0dd14af342467eac2e13cad4acbc881f
+LLD.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/918f2c66898f828414009fa6ee273da5bd654e4b787ebb4d703f0be27e388b46870d68bd58c4f45638d276c61c1bfe2f3c67fbf34dfb5578158d072f82d927de
+LLD.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/b373e1bf2a24f34496754438e563a5e9
+LLD.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/a739f29e332be74cbcc544903d08bbcc12c3e9f5c3d02d130ef1c69426ead2c74b14f98ac79e88ba29fb2e2dc3b28b7d81c9d42f2e27e0ce9442f6a0e81bb8f0
+LLD.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/1fdbf6aa0751788611054f7e98024104
+LLD.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/2015b8e84642b2434d1089908354b47b080d5683f1c7eb2c09de09abb3674e7119ce4ecfa858bf8129fd9e9075bb45f2e53a997421f2457aa9b5c4a9d7edfec8
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/85bd5a9e312e83a09fa5b7fd6abfba76
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/0a5cba5c65abc72361a780f64e64c463797aefe52994699d8d785437b773530e49a5fc2746e36bc5a31aabf4eb4343870aa448f8fa2b119ede3e1c4ea228cc9d
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/ab07ed76a796d86cb6ac2ae4fc563eab
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/485117c7e1daca401c1cfe77324e8f5961f6f33ed2a3c907f4c4a2bf9c45c14d929959cf8e4d9cca9ad112a3ce6a851e336cd793bd5ee284c87b9fe487700ecb
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/852449a26af61d8554fb1b4c22c4384a
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/a080d2da5ff4b832822e099f150f0c15b985d54678a9508711f7f435d6ceec68eba12b5f8c25db0b4841dc5c5edb003b74b4fef391292b9407d7bda73d35c4f5
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/eb999bcb67f789b6443dbfe907bc61e4
+LLD.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/811f72ce250184ccdfa30aa992884f1bdd0a791fa125f089037bf1af45b844d76807c5662a904ec9312b79efc565fd0957f195a70a39248eed99ff53f3284cba
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
index 22d86ec2e009d..122aeb9a53337 100644
--- a/deps/checksums/llvm
+++ b/deps/checksums/llvm
@@ -1,111 +1,111 @@
-LLVM.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.asserts.tar.gz/md5/db3b242aac30d823cd911ae3424b3d7e
-LLVM.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.asserts.tar.gz/sha512/5efe8bad36e748ca1883b23f5eb6da416ebea77f42232e2de9268d9aa92af868b2a20b3c9062f77e734a58ae607b4dfb134e2f8c7879652cc643802379296196
-LLVM.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.tar.gz/md5/640e9260e4b4235b9497720bced5330e
-LLVM.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.tar.gz/sha512/b7d193821ce8de809ec4c3d035ea1cc9b889e2c22ba0d263d25b8c01474a6a353abc9c9b8bf8feef686f00cd6055f06f20cb3f5b85299cc9764fe75c7b3fb21c
-LLVM.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/898954f176232716590f3c885af8c824
-LLVM.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/8251dcc31ca7d2c827ad73668d9e422f777a219d444e9e00c0a0e16b895ffaceb266701938f630d4f354e3ded736061be69d3398f3810dc0df5b803a508bebc1
-LLVM.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/860b7b3b26e3fa5ff2d1c77d087cbbc0
-LLVM.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/4c4dcb47f952b87ea506e6c1cb07e1b978dee4aef14381e78ea389de728d49b351c401b079790a1d1f8145853840d019d02a906a39f3c1edb705c8cf663728f8
-LLVM.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/9cece7e656a511fc8f47fdca094a2695
-LLVM.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/9f918e9a4b552911b58ff43e829acc8355904fd7f050a25ba9e08437b8f7585d48bcd842c79bef756d2afe83fd0a690331b5cb51fd55a2bc4bab7e3c615987e7
-LLVM.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/66fe8bb24feb2779828432e0d996381f
-LLVM.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/090adde3f779fd56815992971a532dda793d3ce4462497daff66c065822834821eb6a0b9575f94241a760cffe4cfdce8b7e8a2b1acdefcbb62fa9db55748ef71
-LLVM.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/md5/b3a1eab6dff60e4c9dc770ef1d6d108f
-LLVM.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/sha512/eacc5a3fc376359e27ff795f63aaca0d1cb3c57ca0dcb27e9a43887dbcb6a2923b1e9895ef0c0fdd3545b7edb5cac8598515f464c27f5eda25dc65425f5e0c50
-LLVM.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.tar.gz/md5/047ea0fe65332c30dda16799e7999185
-LLVM.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.tar.gz/sha512/c62949b9e4ec97f372d3d9fe7bceae923d54db5afbc0902d74b7ebac66dcc1de7d7ba12b10b5028aa1489166cf06b30fc2610ad84bee9cac95b08bc7b7af7f05
-LLVM.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/md5/0fe165f62c2fee6dc0fd3167c6da9ecb
-LLVM.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/sha512/9bbdf63b00ca88afea787069da2c64ea6a9478c2f929bb540fec5d2664c19249ea63a5b69d2e703605ff5bde28a029e5bcde1470e859abdb1f0cc8da960065d3
-LLVM.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.tar.gz/md5/0f07d3c14e1bdc435a9c69431b354c51
-LLVM.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.tar.gz/sha512/f3f572dd805bfb14713a6f4ce79e91c3c9b2fd7008ce9f4ab1e2a55f0e88b192f48beb6ad3954b531261ecdcfbe8d26cbf931057e43e472ea244beae87cfbfbf
-LLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/e7b10dd01bcf10e9c3100c4edb5b127a
-LLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/a86a213de97ad709fa2b0afd7a2103f58c5292bd9e1f939dd79fe02574065642b67823a0b05f9cdb054c5b6546b3051873c67c93bad57c0f53b8327285ef97f7
-LLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/md5/480cfee009635bfb78c164fa10745e36
-LLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/sha512/414af42a442224f780eb6e6455483a6a210250d28040138998ef01c57c63176aa77626ba8b9985ad9ca9a397fac5c1a8abbe0e173a585d218de8b036cec96f67
-LLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/1b6e3e6fcf6b20e824aea1b3b510f989
-LLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/7370730b97875e1f6b8b3576d68d0d4a3112ff7be66c8f04901f905938cdb495b67f4676f03c1e6f0e4f9612e2849d7a9f02a932bb66354d007c2e4cfb8fad99
-LLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/md5/24bad94fd9ce261880ff589829dbeabc
-LLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/sha512/cd52c2c036cdc7d986731b76324c0d241fea9d9deb3a94963b5efa01a1649042c0ec062e2cfc6c27bc747637ace09106238d07aab0df84c448755a61571e6a68
-LLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/78273dd81559e0812eb497c91bec8279
-LLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/1b28a9f2300d4777406d125eee8fd8217dab7ee67db38334cb0f51a749f27baa370d6229c4396f12a63dae210c0d772b83d79feeeb6dc17f19184832eb15859f
-LLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/md5/3a88701757a155ba35cf911b1ad6528b
-LLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/sha512/aca7a33d28e1ddeac19d9c99fd2ab81268a23d2523df7d300c8d1999481902f7aced3e4615d40bc7257d38d10867a8440d9811db81300db1784b2515c3bc8fa5
-LLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/3be9234f6718d0684a40fe089af08503
-LLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/9acd3d69ea99f4e660978d830ead8253f8623d6fb66aad0c79db211f975f42b58254b7f47e284e751c94609934c6895b4a2e662fa0e8ac8835cce1b60e653ff4
-LLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/md5/583608958fe5159e105f349f73bf9a40
-LLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/sha512/cc574e3e9f457194b4ca01e63a6a55d46f72780833b496f570dc5b7c1d85d751f02093da95061162a8d66b0118a1cde677751b32e53261ac0272d540925fcf0e
-LLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/3318c8aced1cd7dfae6cc5722c7ff590
-LLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/a202552fcd12c6379cda41a235be1eab46b7ee2c28b4cd9c9ebc581a5ad2eed3f0c54d7ed861c9045272ec04d598707877b6b831c5109b12b4d2de80ac53bd2f
-LLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/md5/a5718bc977c437d3268a8e4681d2adce
-LLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/sha512/45d053db2325ff637edcebb60167e948f42b7a6e8e6dbb14b2b0785459e06667ee42c1e01b13ed9ef09b51fefb0bbdd861ad59a3320f76986fec60129f937cbe
-LLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/53f2205312d55810fdbc1fec961d3560
-LLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/bb80a8003bdaef7da016138f99e8c53223642a9490c01b55ded0d02f6dd69c8460df11bfde7cbbc5ba1b3c331e2d2e6a8c6f7ca83aba20112bc0f56ee907802c
-LLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/md5/737492de94fb15e377789aafd66e817b
-LLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/sha512/2d31da6d69deb859513ae6db6e3fc5c2bf3703648301b1c520154b6f3aa0eca12c8290026643384e40b261755d8c3061c02d685dc53f5c14104498657f52d167
-LLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/99a0eac6aabafb1edf3bff32c283dcbc
-LLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/f65a01d75c3330299584f8eec1d25f54d89162e5359a7dfe0f73de76aaa371751d3e5008a0dedf5de55ac0f29613f0f2ee1d9dbb7f69871dd0e85e047b44b28e
-LLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/md5/b8947735e7e7f9687c9fd97c72681e9e
-LLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/sha512/a6c699b0994a132867071f6cabffdd6bfdb51eb4fe7e5762e836d420aa7d61a5165d899037df2470e9cdb04bd64c4c153ff83c49e30f42a120fd5b5090ea4fe6
-LLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/fc3143ac4c492cc2041d73556a84897b
-LLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/3e7f3281b0fee548a4135893be999456c39aaba04644f4bb55247d514257b3700d36fed376fb1ff7b3ef5546a43f791b202a2c1ddf6f924a63730c56254eeb8c
-LLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/md5/38f9a63bd7b6275e4cec5fb05d2ac163
-LLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/sha512/b7a131e2d1838cc3dfeca08fa8f6b329ca714e909d35bc6429952e690743a321b8f2e242f358adfe719302742015306e3cdc1ac0ce5be613a36447e53292f9d3
-LLVM.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/ef301dbee02ae00dfb6f3cdd9e67c714
-LLVM.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/944849a1261b3ac3a5141fad55397a0de423a73f57ec6b8d725ef09b3233fa6bd6e76be4c8c4e77c80e3b60bb3ab135d8adb30760fec1d21eb699aa1868f9a09
-LLVM.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/1777a48a9188895ed07163df4594a688
-LLVM.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/e5226a9b3f87c63b32e69e43f96ca07c32b34742955c5e608be7843dc1c003b75e065ed5ab29c2a4144607df23d97852382d7fe7e25f98abaca2493200ee8cdb
-LLVM.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/e8b3d76ed47923cef55a71033f78ceeb
-LLVM.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/88825e210479ba2504892af2dd01a3f0a1488f05ec5df6b352fbe0d6d8bcc691fab0e3d3b31aad2b14eef860dd8ef5bf56e83647583e7c3fde847ec7a0a651cc
-LLVM.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/ad634b98e24aa296087aaf11e6d70828
-LLVM.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/97bb219361efc3aa847fe53fc20669821466aff399ac47bb312af7d0899bc6af0533a0e4b490d74a0fbb3f5e3d0b91af23023109fc9a0138ef56630055e5792d
-LLVM.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/md5/cbb4bed17e57832bd99affd1a01c5e9b
-LLVM.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/sha512/948dcfcb18eb1288f4d66f9d9a7c7b234956cb8bf87d6b52b7d639dac60360e4a3b9c808a908b2d298adebaa9b6e63d8df78c57a1b0ade613fe0da0921d07807
-LLVM.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.tar.gz/md5/c800bc4f0f0ddc5d2626512a949d9b79
-LLVM.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.tar.gz/sha512/4e0c9091d580d8d8bce9d5ee63beccef52dbe4428d29fe5315891ce63a0115f4226029bdd68612d813829ef49f0c6db9b5681c83cd5bc70ded55541466296cca
-LLVM.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/md5/4439c5a18ff9c3bda01aa989b5d7a1ec
-LLVM.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/sha512/5d17b2da2d2a218ecd866d9164527bf4762090effc908fb5b93b8f4125518e2925ca65b5e96210b3e2f77750f2a3e3c2edde8ccde3ab779561191ce7eab294d4
-LLVM.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.tar.gz/md5/9544f9d46eefbf593ad8b665a1cd73c6
-LLVM.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.tar.gz/sha512/8baf49c6495221ac0d0a039bf7988fd4890cb750519b0e2e4b44e8dd504c417ff9635d66d43ecef75e3d0d397b1e47aad010fb91782286b1c26f7785667d58c8
-LLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/8f46834c72ac617d41befe0abfec29a0
-LLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/c131d322d45a95c74ad01bd35c094b5e30a47535c6cf5cd7c2751b99f8942c1cf0c27c6039b33050a4e140ef856d79c5878a81fb98db0751d43b6e463070dcc6
-LLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/14f97547b230bb2774bd1bcbc9e44fcc
-LLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/ff1d8ed1a479409b3bc611cef9d6429bb7f1118972340ecf7c5b8cf6db28a6cc79778e800523c655c334496d3460a93d6c0f42a1488dea58d8c2416afd0725d2
-LLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/6cf4a661be00b3d007c67f7af80b9cc2
-LLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/497e853d4b06c64a61de09a1216ac612ee0ae4a3bc8f01ee9b919a82c82706bc045f0a641c5a69547b68a36597c2be8f699f711f60d5af5fbf290421a4e76816
-LLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/4e8db53272b73e5bf7c21413cebea308
-LLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/184c1cf3e36efdd9da14f6e3decd015268f1a0a9157b9c677877f600bcb4c9d20a7f37d6fb830b22012bb7cf05d7af86ff97dce046900c1a9a90c49b0a6c3bda
-LLVM.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.asserts.tar.gz/md5/c06ecd1d4ab8e4fd73c6cba18bee9750
-LLVM.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.asserts.tar.gz/sha512/eee12b4e551d21ad35f780ea8f3ace4b43efede66297e6941da78ffc768fb28c31a1f7e1ec5e11495d95d456ff2a929400984282d2f2230c55ebeea8005c352c
-LLVM.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.tar.gz/md5/72975355fb31f13f86be0afa188dc436
-LLVM.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.tar.gz/sha512/9a82c8148db2c67c50255c70fbaa3c6cb60fb6bea700357a39ff5e59dc1fb615a13c647331b494244b88001b525d4cd61035705b2ce0e6590b8894797efcfbe1
-LLVM.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/04820df9e180b7008598eeebe49bb981
-LLVM.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/ad5fd5d96f22d67e3ada73b8f0deecff2df212a8e8762d4acdede62564f379496351659232fc5450d23b4e3b8a55af95f48725841233a6a2448d18d3fdc1adfb
-LLVM.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/59af89fcc77a0eecd7c9e8cf812f035a
-LLVM.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/a5c486b44cbfb41ef0623c067e42cfbc5216cf2c09f537b8b792aba814612ded170e6a99a11bdaf9f3790d4af06f7f058b6e7a66b2f8513a8522044a83007a11
-LLVM.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/b404077eea756a6c5c7bca8a54898680
-LLVM.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/70c562d603aa974b8143fe72ce000be60fbd975cbeee4ebd0bf29bb01bae2dcb8006e3774581d1e37fe221ea0432067d06a19e5fad963977990fdb5f1c0648e2
-LLVM.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/1437271560c205407a4a1d72fa3d0ce5
-LLVM.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/463b2beede8289d4b6d3a39795fcf8e2c6007b321abdd7125cd56e2f960fb2b9c3f9e27e38cc23ccfa51ab0f9f42378f64ab8d5dd28a2ba452a3111efbd46c99
-LLVM.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/md5/df1353b2f06a17506ef20e0315ac8396
-LLVM.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/sha512/79a696d86252b23dbc439284f8a2dbab5da77af2541680c569cfa11bc2b4c9ac03218b6a307841ae92aedbcad88573e800ad5fc8bb1c55b957a837a7e53da592
-LLVM.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.tar.gz/md5/92b15d83ee43db8522be4b4d94b8aff2
-LLVM.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.tar.gz/sha512/ace2323208f16a9c53823c0ea8ad2b910d416d834e0731485008e4607ff8ed8cf566509172f8f65a8add0e7a7dd7391704246cc024eb3d40bee73f043481decb
-LLVM.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/md5/69c1c0f5f416d00a3237bf62e6c14075
-LLVM.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/sha512/733cfafec656ac3ccb51dd444f5df028062bd1b3182463402105ae78f13675dd0dcdb79e1eacb5222d0942baaa85e487fffa01018c5c19f0f971ca96315f8073
-LLVM.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.tar.gz/md5/8192a6688c46ca92ca1706fec271856d
-LLVM.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.tar.gz/sha512/49d602a869ba34d560966f00bf575a04516476e9c147219ef7086842326bcf88377e2c601ae22fbbe56f2a12f40d4a9611259825bfcddf05608e73b2ceb363c9
-LLVM.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.asserts.tar.gz/md5/64b59e6cf3c0d8cf09ca4d376b298134
-LLVM.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.asserts.tar.gz/sha512/19a1a17ad7e95ffc75cecd28be4190ed527ca5ad948995e9f3491770e010c45ec79bec52d270abeab25c96846f604b694944f2eb4dc0697f43cf4e9f86f1efb9
-LLVM.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.tar.gz/md5/3221c8c4773ada18489016d3167ce509
-LLVM.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.tar.gz/sha512/514a6cfbca973e95cfbbb880323056fc6b51dd373a6de26e9628801f393e42011b4c21750d0276e562ed09fcc09e9751572bb9c4f362b8972411fca3a93c8f7b
-LLVM.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/md5/03744874a951f038349d0179e752ee40
-LLVM.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/sha512/83f34672472ed640a9e2be7eccc71fbec4d42397174dbc5abd34ffea754681c3779c07692d9eff30db299fa0c922594727f5d316ee2542baa34d081717e89ba8
-LLVM.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.tar.gz/md5/649d1106a8f95de6eaa7835f4eb66fe2
-LLVM.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.tar.gz/sha512/4955cb0e2850542a1116fcec9e04e7f9f6633a806013ec49fc9e519e2d511cccd375410225eb8630d74b65fe0f2633ae7f46b194db8e9ec78cf7a2f12f6920a1
-LLVM.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/md5/d6a1fa15be0cde66ac32692561f2249e
-LLVM.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/sha512/48845cee32afe6ec3338b6abac77c773980093082d9dfedc1b9f03c31a5d940125e2c01ceaaf85d3bc06bb75e5871e24a51743b91ce1b5e8ec1c7f4eab4a4c2e
-LLVM.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.tar.gz/md5/514a676afb3bf9d5a20c52ef3f1d4bec
-LLVM.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.tar.gz/sha512/ee2ba45c86b8a4cb5d2214b9fbc488176e2134a9c19806c160bff7e3bc796991051e04fc5d2829b151faa3dca6738f4468405b246f94bdf27cb5366b1043021f
+LLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/341e3f0b5c160100f5e12783b8f779c0
+LLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/65b2c2091db1364adba4fe1e4ec6b9d6582432a0a0751cd0a3aa1c69798633b3aa5ff09d3de4e47d552d55d5ba81bc86662f1784cff2ed58e800452488cf9d50
+LLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/249910dce0a9ee089711b71626972b26
+LLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/1eba4ecfefb56a00390e5c528c467f921d64e9ca40f5fdb4d7fe0d7ee995f03d253887f7fe40ee285f03b12fa7a194543d18cade6af8a24bf47e56b06a78d901
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/7bc3125dd810bcc44ea2d454b6caa683
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/86742a4476481b14145855ead8a5acc6397782f6d3445f900ac2de0570f1fcf53563cf5e1f3cb59886282083ce63756604f1ca2434e9e427cdc1bd1f68373581
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/4eae06d9e6272aef23afc191501810fd
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/fb75927982b1428b05b765bd5ac017b2c15d89990b7e6cb582b9e1a3ec04d09801d25d5cc6c037a12c205edb7c0f7a2d33832a2d1de7920711e9720dc3ca3655
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/cd86e18a63cd6e84a1493acf0df4e267
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/1dfefc4600368467ab90ccb527a9fdb012b9b7f485d932a0db8c4b1b81985fad931b74494b76ef2162e46280447d39a055b5681b33a17c564c50094de29aeb13
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/c7cf7daa7c11827ae4f9fb2e16f3cce3
+LLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/dabe2940606a671a8e3b4f28bb9e813d000650203c382372142457020f2ccd498534903aa99320afb7ff960a62d752ee6cb724e74745bc1bad1051e12cf78ab4
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/62e575b89fd92d9206abebc19b084abf
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/7ac029567fd68fee82b7096e2fe278ee5cd2935494433b1faace036469c54bc471d614d0bb339750429dd88f3e723165d2dacaa627f73c3647c6f3b51a4a3034
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/5d39ef811bc78204ebfc7e98111469cf
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/10fc9a64d63351e168bc79fa63bcaa6fd49c8483e5ecc40a66216192588367e9b47ec3ea2c047e88f39ea8f1caf8052726f4bc8858223f7744606156b4133970
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/f072fe487e5d1b717aec49a6244adf05
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/42b03a2562728ac86e751abab2e8233d583baf006e69b107d002a9258844ad53f62e6332eab3790364940d478c7ebab6d3e0e2194220e8436f40e6b75063d1a2
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/eabf0239298f13ff4893011e75828bdf
+LLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/34724d9c9a550c85d406021d7265e1848b002b8f212427eebff6e8f03ec6acc336efb0c2cd9d9e1c76329e7c84a84a9d852b8de5897550d957e0e9385129033d
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/1910b5daa31db6542f0c762901ab7d43
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c43e8091e9946ba1d8849734a25b258df95b4759a79676565b624930d4a19805a78b66b1d193e528f95174d909d7895d4a4e49fe8ca298a24dc40d25c95900b1
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/a5198b13dc75ad3454e05aa6cdaca48f
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/9ec8078a1a7246f1545fe074783d6b88ce9b50f62b0438ff5637f6dedf5bcac427cc252c350354b7063f79f4e31a19f699c168c15bc6547a207da497026c2827
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/f569654ecdd8ec2a50986ccac8388c69
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/9b50e3be1577a753f0ce42704846bd126229d8dd9f28bfcbda58c4f18e4b9ca4ec6bb9b57de61b3b9af8157a2983aeffb8af782a073e5e19a8ccc261cbea9601
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/496de8c9e2361f44ac6933480620d07f
+LLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/02a8ecfb6e81e0fe07fb0d616a84a590e23e944588c18348c32265bf6bf19196beec189a0bc40514e379e97a9c8bef83557260839800fabe9f8e39e96689713d
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/05bc7406fd0a703edbc912bb3230eb37
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/898dd4c19dd0f22dcd1bd44264daa8dc64340c890c3368fac7451da1ac872a687d55b5eb50ae4e156c2dc4ece226ec05775daebafe9d8b53eb83b72d2986ff92
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/d6ca30fc3a2796ebda2451f80846883d
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/d7dc96e1bbca38272b1ca78b3ff995fc30434937a58815c63d0a9b4a017964cfb269a1f3203ad8374870257152229941d420f098644375b5f4d1b88fe39e0dff
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/6eb1a197150ad6c165b82c5e0e0db102
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/a159598c2bf351ea79d01e8a454a82bbd9823c080399520af3182e57259957ad07834b03c336e6225857da365e8ec1aa9f65b0ddd0821883ae817cb81f8e6dab
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/116d849cb2fb4b1c8c517397b2b04192
+LLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/7b2596c76d2814fc30992ba78e5c8f93519442fa76004187de9830732b80bfc6c77f5d7aca042c20d8f868cd682bb6f71e3fa32940bc8c7b401753dc4ac2f331
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/27837dc854a173bd37a20f92383f6913
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/1719205cba6de969e8724a99444bf958d5a7943ae90ee2dd11193f56ddfd4f0edf6d9af6da2e67787a64b91d994fee76bd8ffde36486c5229a980c2c4ef07e29
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/f0016c21c045e205131ea22dc711acaf
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/6d192b7e21c7ee3327d288b890f4c5dd03e5f53dcba6905a34cab96b7ad0ab6364f5271af88d95e60aab8f569a8840d17e16f27f6fcdafcaf537d5d4a651dca7
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/9a2bad4518966db29e37e7c88388e779
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/b9a10af9dcbacf1f129d4e9b4cf562a6a4687252cc8a0fcd78f52d75c0c20be0ff32e67413a7902a628b04e7fac1091d35b64b145e33814899796009b6ed2853
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/77c4e24c1e44ce14bc6476954f294a15
+LLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/d9d90a4ac788dbbc1b532623a380d4cca8813ecdf8b7b4a8cfff769499e50a1433bac618234bd0765d8a4f50aafb3fa724d16ac71baf75ae5a2b4396fa2bd017
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/b29e36dcf5a0aa05734f1d6a0afd6944
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/ab46a835f9843c5b3427101bcd0c5d2b8acf79693aa9b8d4282d499f25df4ca248a81fc94ddd96c75d69d3c6b3814b225eed81bec32fbe9199bffdd605f7fec8
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/a411269f925cc968a0438562262e6d97
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/04f275603134b0ea0f23da377e4983765885f2b1954d5c617134af9f103470a5e50dfda18bcddb836852db2382f1c134db40df00b36c8bd00e7a9e6ff1a9e684
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/841921e33407e15eeeaa76354aa2b737
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/e1fb8b75e141cc90916c5c81c31ee91336911983c525f38eab86682ba69679dfbe1f10c9b673323632fc75f38cacc2af47a3d5d5d1031ec9a2a60cebd68d501b
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/7342a1d7b1d2c0fed7f5edf1c331ffa8
+LLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/dae8ca11fa8d34f99ee19a95bcd108a65b9e6a6ddf2e5a9b126f2ba1b1cdff6b7ec21e9590d70b3785593435bb71e47703d9765811db814a90aa8a47940421ff
+LLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/10aac489dfa10a77427a82958f525da2
+LLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/a87f721df4fc5f6e929a54d8e41e55fb366a051a610836923213bfa42a7f1593de880391131619653cc3571bb76a4c82e011852ee5a6005523957c9f0937e6ba
+LLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/7f231fd359f9297261c22f95d8f738c8
+LLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/fdd6441011609ef341108ff2d108c6f320d415b621a69922aeacc555c3d1ae6090a0f600f24e229a609b88ba9c1868900791a6590033b7dad333ad11f8a6365b
+LLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/c4523a485082044553e1a89049dc4734
+LLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/db365e63bbb5189f7f348e2fd51e627ddfebf838ca9dfc6c0f8a7bbf6b8a2a03d78ea3ccdf08b0c2674f4cf5a0979506efa643554091ba751f16051bdf42ca9f
+LLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/bcd10e4f3e5a4b00d52441e0094de1c9
+LLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/b17fae89a3dfaa9428cf48c9c0866477cc75edda6aa3800702227cc9e3d6ebaacbd60cccc96acb4ccde56a2de531dea5a436bac8e6c450a4674daae23b878037
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/2be8cf274b7667adf8d967a27abdede0
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/15f58c9a00aca5bf828708089912f128adfa3b719cc2fa8b9b4cd7ff7722d02375bc9a961b02d5c6a6c9ab637b626d78876741bd824353aab944e1c3b6719837
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/0dce4be3e8cead78cd3d12ca0796d560
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/cd60b39f2ccfca8ae0a497292819e9cc1893f6c3b2162fa9bb3136187351cfb1d6e4855141f1e9252bdee7e97ad61c0560566c2e9f73fe77a26b7f4ffadfdcdd
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/f2548c8f4bf1edb488642245221829b2
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/1604986526156a40ea82f50ddd0465d06df9faf306835f1dbbdac7da7f97c60fe684cd6c64acd8833a9f8b1d16f80c123ceef94fc16f255f815b93f1d41251e4
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/1c268e3e93ab3a34b3c05322c2fb0dc9
+LLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/f111ca82e196ea9507bb089b9d10990de1acb1a94778c40012ba6bfc16cf362369fb1f9dcc869ce14545439df21f432589ec004816a1ba0323c5edecc2b84211
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/b39ce0b0f143c3bef4dade99251003bc
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/62148e1e0a31d6b28effda0a5016d9335005b27ffdc5be1d184efcbb13f13e29eca52eca19cc6800d1d0421c0e67a36027e05d5fdc967dae686b5bfd112fb2b6
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/9475748210eb5b1947fe3aa6673b6c29
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/54320295e59e5903db558b6be0220442dbaf7ea78e1612d54a35cbe014541b354ea708679da00851b962140b6da77301e27b656fd478666d3f0f710382c13a85
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6a533054ccfc3d1b0920eabcfb45ee03
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/3871620aeea2ccaf6e4b17a675c5504624fc6d8ed57bf4e5b66e0372b7124e4f3d1e0f10baa1018d5a1ac5bc4bf0e9d2143e84827712fda1f512fed24829f1b9
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/3fc6d1b7d59b98823d6016f97835b7c5
+LLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/745942235e40f2ab71a5eaef2768842823620d4a4dc7454a7512fb2bd95bc8a74323eec6a4b33edf1ef935151c18a20172f60fcca2fca1ff3a37b1e019ea4640
+LLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/f069af39cbbb650e293093b5989324a8
+LLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/34685eccd8c1cf7b72a52bf353de16bd0cac13959584217ce5d0995b52f506909955a7051ff7b29ab9d9c3f603af8f7db936f11e4bde83f5acf16415de62880b
+LLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/819a9695c365b9365b6cdba7cf9288b2
+LLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/4280768862b19918e11b6a7ed09f150270e71cf4560b18b224b3591c460c9375777e73e41eda375271d719f23b211daf3ed51b3c87bf4ee4429344d14f1ed7a5
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/28ae362155ce224cef605cee53e36d0b
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/d90f25e57f92a9da68245ceb15316e3868bf657d7e744f37cce5ccb4945777ec82fc5d470ba4fc104fe7aaabfff7b0dc260838a45331e4360b0fd14c59a55666
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/d10ec63510dc1a043ee0a4e37b49eacd
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/54c393208d1f51661e631cba62a21c0685fb58827067d5ea7c42fb3d6dd8c8db99d8ee1b3c304abc25510bcb0265d86ca03e1ce19be4faa252d97cfc8a1b52cb
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/2c1e000206c9e7c6c8e7515eb8115e3e
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/12c0ead798e43448a30699b5386b3d88aac49aaef9bae283ea6d089a1c66df7293f4f220a2b5c3d96e73e556e37e745f38d81f5c68e09a86a2b19a6695eff460
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/21d6c5d5e422412b88ffce50862efb29
+LLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/5e8e17ba79134e9752c7fbd28b62e4616574a5e1dfcb0980160a3aad28a2f6cec4e48ed1acf73ca1f94d74397f7ee3eba53cb1280699e40c451295590ede3fe3
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/293fdc43431493f915a3e0a5b3c6d587
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/27e13a4334a3bfb3c91fd06abcc4eca7a347f4bffcbce40834302d153ef29756295121b42ac433c266668af1428ffa08ed12ce75f21fef44cd7ac1d8bdfd155a
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/2825dac8280d0563b7f521a9eb8c0563
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/7f4549ac7b63e58d8c149f6b22bd997545713477a1df3b32adf640f3951580df1645f08756d9ba80c479160cf5759e3f9372396655a35cdca14f4be4afc4ae22
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/0c0da0eccec4a092fc0e9a915716ed6f
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/e538e29c4d52d9aaf151670619702541fed8231ae4c7fb9431a425d10eea95433087034a37da8fe468bd27a1c882f6f8eb9549ef71964124db10e99f4b402ba5
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/6b4fd19277c978306441da3b58ab86a1
+LLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/6216b3e1dc6aea979d8b5abc4cc0faf510e4e64441b1d18b4b36c45d65e874e9046e14eea67efb88f3219449ef048d34fcb751b15c59f8a299aa822b426d50ae
+LLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/b7956d25e0e5ced19df637b4fadaa532
+LLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/ad632493095a8fc3638ff48514c9902215378532c1455cb19d70da9f2ae46fdd91ad4a8b5a3151bedd38dda9f07c21f9a25d8e095ded7ba843f9bbeb005e1bd4
+LLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/392f0f0f61fb672002c7473c64a63ccc
+LLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/d620dcee0b20e3aa4b2fcb7ae835933b33b5e4c4b5d9102b885c70b1dcec535239eb5a3d6b56b51f7b049943a2c79950bcd4a4425610f7a1531f6c452eac03bb
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/0b41650067323bbe0c5edd5c060b517d
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/111a21a5b491a77c69ee724b37d15b0c7baea387bb6a36695a1c2dd5f6e2eedb0ed211513145d8a6ce4dd6329b2de67e9bfce1b03fbf911b906a33a39e573f9a
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/a9079da821bee8e4b5aebf47a46cd9f8
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/7088945264d1ccead492e81636086390fad91b0e071e9f3a54ef903b619ac2a7bd38fa5e0e04ea1e299f3985e04838cd5b7a2dffd666b8e7dbbf3b419f74df88
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/4ccb3d0eabf8253cbdc1192b04c78d4f
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/9d817068dcc2b60c77fa639aa7632cbf071746e7dba62fe524c095f86e88b9323c3ab82ed5af0dc8b1af9c3e6f0da18be53d92e7c05e2d056c84e5a4e974b6d8
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/a88f7a9f42d2cb5567c84d7fa2a2732d
+LLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/9b16cbf75e9971dd4950cd79aef85396a7d8522a572f1c8017af82725cb335674741af680e1dd10c731987a321d3afd5e3e85718d3c3fdd1c9de4803e72a66ac
 LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
 LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
 LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
@@ -138,115 +138,115 @@ LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8
 LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
 LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
 LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
-libLLVM.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.asserts.tar.gz/md5/1b99f43b611f8c72e187b767adf8abf6
-libLLVM.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.asserts.tar.gz/sha512/e8a65c950e1d782ab6fca6da5b9ce434a66901d7c1efac0ad973cc376ea1c6d1ce7d68802d623b66d9fdd2801035383a11961cf544173a54e90b6cc4acc2ff88
-libLLVM.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.tar.gz/md5/7a1171e54395fbf5742c0b3dcb1ad116
-libLLVM.v17.0.6+4.aarch64-apple-darwin-llvm_version+17.tar.gz/sha512/f173b4fd6090cfddb2fe43da5603d7eb627bd8cc1385b039a719b830fd82f230d0e1e7e9d8808c94cebb83c46145148c206e4dd43cb5cafb96d7995f116a170e
-libLLVM.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/0564e41ee1ea590661214ed7747fba62
-libLLVM.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/8be4f721f5d6f264f4934423fa884e43ef6fda5f895786ecdad51bfdd3c02df575a84be87d69a367cdd4131576558096502b77063d766a3961be1f1b4ab8e205
-libLLVM.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/d64714dcc951dddf20423aa7831cff9b
-libLLVM.v17.0.6+4.aarch64-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/41bfd5c44c27be4538a19da87bab09f35cb7212eb31205cac876c35721a15b6baba26ae52b5457f9db2e4f51e9600cf9677c68a6adb6385d1fc07fe7edec274b
-libLLVM.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/f457fb97ed50a72dfc59a3e84177afa1
-libLLVM.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/a04ef467f8761843c6e621e5d996f9cbf06d11d46c6b7bf85cddd768ec7ee1a6374caf153afb8a5c017048266e6b5df6c5d90cdfd99f1ca0f2e096002e2c0d4b
-libLLVM.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/4f6544a9c3e3e4929ccb5f53d5be47b8
-libLLVM.v17.0.6+4.aarch64-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/cce7cc5eaceff2f02e67258fbade6aee130462d049d1948c37061018f9acccfb4bfbb0cf8f751da83c8f76fc504ecd4ea9afe16382e4d9f261c573a1daaef942
-libLLVM.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/md5/137328448b1a47a3ef13cca0665356f7
-libLLVM.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/sha512/0e6ad105e6cf8ba21a7521ba213e3608679e414740cccc03fbc33385310b50637da9a8c1cb0ac3b54e0ed487e04e3e0557cdcac40d25211731645ee1c56aa238
-libLLVM.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.tar.gz/md5/412efb53b9468fa5a64866c76a60d9ab
-libLLVM.v17.0.6+4.aarch64-linux-musl-cxx03-llvm_version+17.tar.gz/sha512/8c02a1d910aacf807f8920a930ea52ef423d70a6ddb18eaab32ec14e836e08b50d2444fde6832f5eff21bc14ca291d69a2968d6bc94e4039dc439b879378bd3f
-libLLVM.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/md5/56445a46ec24a4ccf796b42d83a5b43e
-libLLVM.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/sha512/90549dfe474c8ed9b12b4589243d53508b2b682f71061d583b77a01131fd215341f8dd860609823c5a564c5dd61b317c844b9d050920e19a99daf3d024f937cc
-libLLVM.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.tar.gz/md5/48d268feb53cccc7b12700a172793525
-libLLVM.v17.0.6+4.aarch64-linux-musl-cxx11-llvm_version+17.tar.gz/sha512/a6fbfc44d5382de86548ecdf7d18a8487ad37c84efe79d67d9360872dac756e7e013972981c97758a7a9eb3857d70d79d3b8bc90aaf1638ddd8c456111507ec0
-libLLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/17986258a97407361b4c4c3679932666
-libLLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/71fc532446f5b20f2118a56b412690ed38c3bfc52d3e52079dd903b671bd4624e2125f377c183b86aea4ba57592ba9a1143b5a1327ce49b128bb7c81490305ab
-libLLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/md5/dc7d7b2e6184a1ecaf433a2589b59940
-libLLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/sha512/d14b4b583dcfd8709e17d7ac79ebb5cfb044480a85591b5e26b8fce9ecf49a13c4b562ad6e6d2081366db86e6b66caed2278ef167a97650104fb818b5b0e4776
-libLLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/d6c4ac2506639735cd7bb4940404806a
-libLLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/2089a09a4ba75fdaf053c4a59b3a5cd333c6e043f9e5bab186d5d751aa84d616dede56799468768190b3b8120747e7e08d404b8c39e7329b3154ae134ebbcdd3
-libLLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/md5/20dadbec0c7ce386f718031adbc21b9a
-libLLVM.v17.0.6+4.armv6l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/sha512/2a6f09158272ba3d2a332646db57a94c2b9481835a974ec87f4c9ff23b5e5dfd6030f71d98a43380fb5bde078d7fb586cd5afc75b4b4428ae80f6dd09dbd26b9
-libLLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/9508999ae1be9c15f87ac49eef8bae80
-libLLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/735e5cee32590cb1360ed760d49357b2fdc830f943e3c184ba9c213f620ee38b4b8d7dc378540fbefd0f500c61a131b36051834ce521bb6d75f0c6ba7e223606
-libLLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/md5/0c56726d3c7b249621c7e5e71bb91660
-libLLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/sha512/843062aa01605beb40f2d5a484638fa4b896016cca242084ce74d4a5eb1c0b8fc91addd71be105a784e083b90b328d0c9fdfdfabb0d257bb91d5ada5f4f71988
-libLLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/3674c019b88c8324c78343cf45beb98d
-libLLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/043faeafccd4bfb5207078af3517832ca710add36eaebf8abde86e4801c1e964d3bad5547800ed5fc4722b90c2bdd606a11ca06ab277f1e48264a194b1cf85c1
-libLLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/md5/bd94929dcafc7ef8d4ad1f788023afa2
-libLLVM.v17.0.6+4.armv6l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/sha512/26b40bad7ac61366e5609c6078d2ec34bc18ef89b25d0c251a6dd49e83df4a62338f49cae2729d245a1d367a9d7bde01a286eefbc71668097d83b4c98402fab6
-libLLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/0a443c1b7289030b32e22dc137b4ff3e
-libLLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/6ad96be0acdbc90ac092130ff438a1bd0df409683df624e0fce4095bf393ca90c54c71c19b1dc1a28563a25ea06f35d7883f199533d3e52ab42bc373212aed9e
-libLLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/md5/5ada2da7581d128ec2dafed8ddd489d4
-libLLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx03-llvm_version+17.tar.gz/sha512/6b9d41908dd882e7a874a46786f5bf67db1d63c2917a91119dddbbf01bd709ec5d2487c0f3703e669a7ef404fd1a5a7c8671e4ed2e3fd10a47e6c4f6c2b7f115
-libLLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/86074d6b9a30cd8b6ffd8e1e1b3a6d62
-libLLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/48c7d92a780841d333adda21984920ca66d47b54b152c4316dac05bbb6b8ea6007644cf93b4a4f8475a4cb5a228dd0d0cc17482d873c7d9c9d90213b64c3ccc8
-libLLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/md5/df1fcdc1f7f385fe630bbc83f121943f
-libLLVM.v17.0.6+4.armv7l-linux-gnueabihf-cxx11-llvm_version+17.tar.gz/sha512/8c1a8fd4665c871d50e68249453918f865429cb9d3fece6ee139f367d006d5cc21613681e84656f9cc4bc6e884b3de7c19c808fe9dc2a9c7ca8b1ea9aa594e6c
-libLLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/md5/07b634f82a8330440a2d5608cfa90c42
-libLLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.asserts.tar.gz/sha512/51cbc0753da4612ffd8f3f50c2536e7cb26d6cc67c3b936f2f971cceb9735b9e587dcbe88dde32367c9b581fae639bbe8076b774b6135288f2b160719dd97735
-libLLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/md5/26261c713ecceb1d7a076c784d76bc0f
-libLLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx03-llvm_version+17.tar.gz/sha512/64344d21f38858d4a10adadc169c03ff98d523937882c8244f297d5e835ec8186eb8ad20e54c4aa5bed597af35e7b25cb2499607c967bf404054084715d631f7
-libLLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/md5/24ed4293f7fab2172ab21b96fb44817a
-libLLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.asserts.tar.gz/sha512/d3c612c24e4a3159699ba5e31d51c9068e977780f3ff2af49a1b084af707398e51756921eb0fec7103bf99e80b6beac4cff5c1bb32c72920ec0834be7929b33b
-libLLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/md5/6e53ecf916b54f97f0aa6fdabc49e9d4
-libLLVM.v17.0.6+4.armv7l-linux-musleabihf-cxx11-llvm_version+17.tar.gz/sha512/8ceff63d6ef095dc2db7d836871bb07ba8d36bd946938a21dcff9edc8531635df8af1ce0b39fee0fd644929ab14d127badc77697a559fdde2233d2a74ade6282
-libLLVM.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/122aef0ec2c01128acd1b830faf9e279
-libLLVM.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/b163e80b21a91b75f8c7a82a3cae705bf1dc6b9f4243ff0e5ebed535589ddea835b3071c36794ca8511b411c71b545a9c3eb75f0a530e682996922916e2bbf5e
-libLLVM.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/8410e6b787a1a39cbcdfefbc69ffc0a0
-libLLVM.v17.0.6+4.i686-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/debda3ddf24fe3228ac3f90f809aa4c3479a4d46a61f5aafb609740d353acea80cc33086e5fc79303845a642c4171c7da79108a213728630e5045daf18e0d6e9
-libLLVM.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/7d34ab481b9b538feb96f53c5c0d6305
-libLLVM.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/2af919b8481c1531b9a2458dc80fddedf3bbc0eb9d70a4ba7b6b1ac5bbf1163b3c407a026fb95d5c580687ced6bfa7ab474efe91575c9d3d98e3801e1d64af99
-libLLVM.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/26e92a8a602bad11d07e5e28dc597363
-libLLVM.v17.0.6+4.i686-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/f6ceb5ff769b7fe75e19ea10fb3ddef55517228526e6fb3d961faa8e776e7b3cf3d62536cf1f287a4d0d9054c9e7b92181b7e3dd8ecc1d0f79bdc585f2008d37
-libLLVM.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/md5/14025c82e278f11ce31fd115c6e8c344
-libLLVM.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/sha512/161f80c5d1289a90cab305bc6dc6a54528e797e6a0be375afba819640507df76636885b9aa5378f2585a7441acad50566004f437ce1e872e50e8c7385fcf4621
-libLLVM.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.tar.gz/md5/47ebe4003cf9938930d992d12c52e6a5
-libLLVM.v17.0.6+4.i686-w64-mingw32-cxx03-llvm_version+17.tar.gz/sha512/0d3ca73be07d98bec4f283e84c4286249de7ee8f2b9cae7c1b0f44a96ef9d90fd16e3911c9fd49652e0fcd105cb2588d66994aa502e9b3a7cf22eed6f264c6b5
-libLLVM.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/md5/91d359c70756f364192ae99a3078773e
-libLLVM.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/sha512/a2c36c4039e98b06472a4081621cca4352bf0050d915a3d28e93185f16e85fc38305079a94d13870422feb9e5c365219d9213fc64441d1f9f2dc176711447282
-libLLVM.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.tar.gz/md5/56857016759f1e7a7414d41b65756d20
-libLLVM.v17.0.6+4.i686-w64-mingw32-cxx11-llvm_version+17.tar.gz/sha512/e136529b357eb9cf6659f8b0447bc63bce77e3f0b943e955c01571b69184fb0c326b08effdb9e08342556c3b8649603d94e8c9c265041482e2c103b582f102da
-libLLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/ba23feb343b22ea60d9e1ffa0d4093e8
-libLLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/84b77313f0048e02b7e4d6185a47417e66ec6f32ba2a8e9029b688a386acd3c19c84b1bf351e2ab6ef7942101f1fd89402bd12bf4676d126cb1b283ce9272d0e
-libLLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/5bcdc7a767e191726714edc8ca6a416c
-libLLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/fee08e6b046128c83932160a376dec01666c10dcbc22584177c017ccefc7201147c74c12137213c8209db8f0ea04102628c47dbc9a51615db889afb0cd11abdc
-libLLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/526db12f2f2238cbf935f7a2bb7c2485
-libLLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/38602341b0e5f1ab99af66cbee19c0587beeb7883c71ac9b3a7c5853a09fe1b4aef9afc6ec66fc53442e491c976f02dd5dbc739ee9974689af5f76396f2ad382
-libLLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/6837b6aa15a9628614b046c18985dba0
-libLLVM.v17.0.6+4.powerpc64le-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/4fccea20fc1bf221b27193a85fb3b274c4479c6f9b5c8e77fd9666f053b051785e7b4bf512466a0e6df5c303316825523c634b3c81e7829824b3e6fa28b4f032
-libLLVM.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.asserts.tar.gz/md5/0ba2cb738f9e3f1cbcd0774331ffb7fb
-libLLVM.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.asserts.tar.gz/sha512/5a6de296017d942e7ec108663fe238f7bcf2a0db54d9cc3c44f4b2fd2596f2d4641d5ee1ea183d0b6cfd3bf10a4d1196c21a503f89f8c1c3746023e5558c6049
-libLLVM.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.tar.gz/md5/53cbd7116db0d09ef0e41802032b8d3f
-libLLVM.v17.0.6+4.x86_64-apple-darwin-llvm_version+17.tar.gz/sha512/30cd95f1437fd05a73965e88d35e3c91d4281ba9a339d04a36d8450576e8f32eb1b7325b45b8c979ca63b319af582c49f867a7507248dd1f3567226c9fe29c6e
-libLLVM.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/md5/d9d5f588ff478450645c99a6fcbc59df
-libLLVM.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.asserts.tar.gz/sha512/b0320e945024bd3519edd48dbfac8735a6f6041c199bd78db345f27ada53bc12917184350a9448b12d4f2ebd594e0e1aacc12c7796807abfe47997f38db21c9e
-libLLVM.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.tar.gz/md5/8e79c58d7ee18853107954d97d19afac
-libLLVM.v17.0.6+4.x86_64-linux-gnu-cxx03-llvm_version+17.tar.gz/sha512/eafed326babfd8ab56dc75c999b7526729723d20a343c98125a532ad35bc3ef1cecacc8366679190dfb96b7be6617adba614011e87d925032c5dfe96172b9228
-libLLVM.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/md5/da04c26d8cfd0dc3487bdb28c5641673
-libLLVM.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.asserts.tar.gz/sha512/5b61115f20383e0c0c7274e24091d6e8ac29961aba5ba5a81c4f8d1226b969674d72642627125fac696b6dfbf64cbad7aab1f090bca217b8df4f50148c20442c
-libLLVM.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.tar.gz/md5/0283786c2b0901e9e5893179c29c6cb3
-libLLVM.v17.0.6+4.x86_64-linux-gnu-cxx11-llvm_version+17.tar.gz/sha512/f47ebfc5acd36940ea64d5fe5d3bd69aee430c911c58b453a2355b55690b488adc032437bd10f893afce1da5f8777ca5446426dd506b8b5fc9fb6f76fbf9f6f9
-libLLVM.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/md5/aa8af3906a48929dfd6c04a83d909eac
-libLLVM.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.asserts.tar.gz/sha512/ef9954203c7f3ed81e044c44ca80374485b91551267a5b74bc42c4fddf82ebdd7f4136dcd22b05d70bb66ae47d4ed49079f5e83f38f0a7b9141158d631f96c9e
-libLLVM.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.tar.gz/md5/26cd3622c65ceff6a74191395bcec31b
-libLLVM.v17.0.6+4.x86_64-linux-musl-cxx03-llvm_version+17.tar.gz/sha512/033cb0f0ddc9027afb5dace0ecb39b3be9f13badda715fea1f8f04ab969f0a7b25544849abe851f4aac2576f4d99c9be8595296e8d1b7cc4accfd4cc3c882b3a
-libLLVM.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/md5/a2fc72f59c1cdd2042b855c833e23c1b
-libLLVM.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.asserts.tar.gz/sha512/0fc74b666b91d667fc112f38b970bca4cedc3083fa832907d9daddbf7cf99fee89ea42829eda609bd96a1bc9d80adaf32b047232a71c5957b87fef60cdd4c046
-libLLVM.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.tar.gz/md5/91d0f5839e7e744eb8048793c3236a89
-libLLVM.v17.0.6+4.x86_64-linux-musl-cxx11-llvm_version+17.tar.gz/sha512/da9ef48726b6d4e2012831bc246e3e6d2401af7ddc7636add6c96239351a36c3c5ae2fa71937b047ba0f63eb0377692ae85357c2be0a73ab6e5e710193266bed
-libLLVM.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.asserts.tar.gz/md5/6942c1fc5ba689e7058100a6b0fce16f
-libLLVM.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.asserts.tar.gz/sha512/8919f5b643aaa6c6c761d92b4e3d6d28165e18edd75baf2ed1dc32b27c1b2da55f71b6dd5ba7d114d021993eb4db415e8ae264ff014a12dcfad78543c510dea3
-libLLVM.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.tar.gz/md5/1a91e2eb0b4d4f6d691d552e78060661
-libLLVM.v17.0.6+4.x86_64-unknown-freebsd-llvm_version+17.tar.gz/sha512/f1a1e2c1ef51bfe489660e2a1b1c997f550cddb8bf09634cbdfc6c17bb0a1d6096ad94fe92e02cc5bf61e6b4bbf4d3a91704e9c15e672f5f3ab4a9766257d395
-libLLVM.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/md5/c9d70905fe2dfde034855eab75d10339
-libLLVM.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.asserts.tar.gz/sha512/58e653a04f75078767de6d4a65086eca681f27d4c579fee518ae974d47252699bc217a150c5e688f69d7444670a3812ad0edebab2886a5c4ce501d2570e38cda
-libLLVM.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.tar.gz/md5/c7342d2bc6f0004c78b8e768da06d332
-libLLVM.v17.0.6+4.x86_64-w64-mingw32-cxx03-llvm_version+17.tar.gz/sha512/c1736f186925c7c21a97570b0346ca235d20651f7329ecd4142737473ce67b98a36434c02b384e3b698990238b6118f164601af5d91895bbfcab69397bc6b85f
-libLLVM.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/md5/a90df960291596424b7291a68f632404
-libLLVM.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.asserts.tar.gz/sha512/ee002ab1b08078d02b5f1624ad2b6590d1937ee4c7d0c78b03b6dab8c7906d46f5373b8d5fbb76460ed8782ed85c84b12ac4b139a84bc73d2c17064930a668c6
-libLLVM.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.tar.gz/md5/e1928993bffba68c277f7bf6e1f537ad
-libLLVM.v17.0.6+4.x86_64-w64-mingw32-cxx11-llvm_version+17.tar.gz/sha512/c18ee72b3187b1bb5184e5047164338d2a52eec626db44a09a77c23889db8a39e417020f3c50feea0f180aef39f2f23fff4d3324fa7030e4feef6a4033fc4c70
-llvm-julia-17.0.6-4.tar.gz/md5/3c69462bf7ba6219955dbc9e7e0c52ab
-llvm-julia-17.0.6-4.tar.gz/sha512/aa96b3d01d3c2c86b79712a13f1abaee8dc95b63c8c7733588c2d5709bb72e2e835909af5a907c77b5d99d69ec69f97cf567d706d11d5f54d4c6b8536fc7762f
+libLLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/2ea6046caf5a3d519ab1c3309a2eea31
+libLLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/079720b30c61ded8499eefdb314477d58bd121e9f326d98696ee39b2ed91f806d5f67e68b6fbef8613a992175fe34694e5efe83e87ef3bfbed67d6b7fc41ebf9
+libLLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/md5/62c49bc7767d1ff114dc6b6a996449ae
+libLLVM.v18.1.7+2.aarch64-apple-darwin-llvm_version+18.tar.gz/sha512/c708472b325cd73b94e10003bf3267b0ecbf3627072302fb22e78336974f2c7855c8597420efc954bca30aee17cec55277aa0c95a01cfff38d5d77df50c807f7
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/766a2de98d275877bb676ff1f23e972f
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/3b353ea038fafefc13ccb4a81c7242d569c206362605be374fb312cb495f385796d052c3a7e08c7fe6ecaa3018e2a7e3dfa43d71a8c3a94987f7dc7aa378fd22
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/0684a6b210b799a8a0f45a286f3dfcc5
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/4221e2d74117bd7e89aba2945030c1507e51999b236814fd23036565364c328392e87032daf1b9fe274ed89fcf9a6dcd203f0f1c8602c2a08d3fcfa189a5fefe
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/6b460256e923637e5107d67859eb60ba
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/7d3f2736afe4022842529b1355cf9914b7a1c7b1e261f814a4523ad30a0cf0189056d5117a06720bbb7a844a435bb632ddbda2daadbf7e01c0120452cd13e6a3
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/c2b13a6a296adbb4be91dd3bb5be0877
+libLLVM.v18.1.7+2.aarch64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/9086937e718125afd535b0066ee08a3523161a94fa7ef3c9a3e86bfe760f251b6ea7b035888e61a0e7f192ed25c9bd0f4dc153df86e08569e7067a7a30ba48c5
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/758d33fe0b2b3d0371708614365450e8
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/79a662f72ba1b89b373d1d143ee880a12cb128211e79182e7befe8b3e50298b594de2ce489ca8bcdeadb17fceee811622f8bfcbc3e232cefdaf9927177469eec
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/2dcbb811be8985bfed3c8b37733c0d40
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/17f6fbd96ed5029f360c101cedad127881e14b42498d66f717448d99ca1909057ae79169d934e08157edcc7467db4b3941bdda26a2e9f42645963eec51f27e29
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/bd3b904b5f9464aaaf87c41b899c8ca5
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/fa99e8025419a18f548f658ea589771c2803480c3cb3a25cfb75e26ed0993b7b37bba204d7cba1475319a71159813b2b58a3b3327ba24d264cf80ef24263628d
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/b4f9038d5c3c13207111ee1a9a918cba
+libLLVM.v18.1.7+2.aarch64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/e8b97bee30f597cc06d31175e12f0c2035aef0054e8abdb431f31b1e9d440d561bd9bc6637a403441aa7f3e1d2a46c600734e17e3b7ed0ae899c92df91758780
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/06d8e634b4a6914efc18b7962df52021
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/cf6aeed1eaf652e5830e34dd2ba88abc33668953281146106bbfdbc92f5f225645f00ff5b4a0eb902baf904362ab4eb32192fa50ee5b2672e8b031fe2550f9a8
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/53e83804b63e6ae4d0f1c97abcbbd1c8
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/45b3ee9b105ef2ef106fa8ac7b8e902cd1d6bf3c9bfb57edeca9e14f1654714d23fb086b369a9fd3cbb828c04fee4cfe80d2b2a2bfaa852d3ac65c0d213d8c62
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/91b6cf00564053d385e30b34e5b8778e
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/9111f3f02b49bf78340c9b0c5c1325a1ca09b62c83aefece1121573dcc21dce095060351f18997971e5cfbaab346cb12c75cdc0fbe8fa92aca2e8a68b5f5f577
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/f6c91b71dfd73c7301a4e3de48e072de
+libLLVM.v18.1.7+2.armv6l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/581d7e1e4d85aeaf082fa31555074471705e391de0771bf66665807afb5192c79c481ca30e73a25f4e2d48d4d325f0198e39bcbfaed2c9bc7477ee917667f5ce
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/ce41ee46959e5e3a17b6c99293afedb7
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/73d8c5af750ea9deef822aec58d8697243ca154bc4435ac0b0ab8c90fc97750e91fa55f8de7b8283eb1ab19951cda3e3c4c60834bcf13730163e593126a8eb57
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/67ed5b654852dad400aef17fb542703f
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/07f70c57e27eea37f520f6f0a954b54d2506530d5eb5a74e5a8526ba8ef55a948073c49037544b602d03d0aa482704292eac943f0a83421386ccbfbf22ee8510
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5b8bd88d49ce21e5b63af6f77782eed4
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/cef1c561ae388b2baa08e39dc195989cb795d8a2747f5f11e0dc9d9e107b9e99dbba465335376beff2e1b326512f6afc962775e0b246f3edcfadf509235cabd8
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/5fbf26d20b2ce3f61edc9a9ca2eb5284
+libLLVM.v18.1.7+2.armv6l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/2c564c95d648458b9a0f0c963246cf5564c625107682f680390b6db5fde0e2b15a964fd3fd23734b5b2bb135db1fc698812d61b3f275710593f4defaee4a9c23
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/c81bc29a75acf4f806f3eb13bf890604
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/c8c922a0a4fefd549f1c2ba396a3cab9cf7738aa82e7ccf7ca29c090260e2d73ec45d6f2b07173d584f6074b10fa04052114deef6ecb6f53ea87f1924074137a
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/md5/1fcb40ba1a427105b4e7d13a6c11dc78
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx03-llvm_version+18.tar.gz/sha512/392c9ee85ba7ab6697bb8979c7f443d1d25f7ac9178e96a886401cfc68d75a43ce98bf3038a7ba70a9a990f65e604d38e043472cec3badb25fbd1b38cfbb7162
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/427a19eaf69725d11bb33f48de9cb205
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/542e209b10c13d8dca867247a7414f84adb832f40051fcbdf0dcb09bc9664a77248e1b0ea1687805847dd9f5a05b86475dd76aba427c9a1bc83f8502444c60bd
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/md5/ab34bfa2950014936edd13a7b5db8170
+libLLVM.v18.1.7+2.armv7l-linux-gnueabihf-cxx11-llvm_version+18.tar.gz/sha512/6376b25d0278e5c97581480fb4d54371b09a08be88f4cc39d2c7b3875f1189cef60c1be6bea5e12b0cf306cef8b394bc7d00f8b0fd95d749bd1b4eb318af7e15
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/md5/cb6300fe87fd7cb9840f3bc44af26878
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.asserts.tar.gz/sha512/a7984cd90fef55559142fc05d91b0da1f37f77f25214e93ff7641b7c3958f08dc7c082611915dbfda4bbbaa392656ac8604d4f75369777dacfb78baee2f99b16
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/md5/b8a4e8ef43340e9cbdf5e4479c6a5a56
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx03-llvm_version+18.tar.gz/sha512/fc249f2b666c8a8129e05ea08c773cbeb7af6d37791f271461eedd99adcfc5082e8609ed096d8a46edd1e73505352712a41e0ddc247a371f78227aab01fbe0f3
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/md5/5864689df3298be4b1b4df1ae0412d3a
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.asserts.tar.gz/sha512/8f32f73e366c3a6993fa8d6b8cd1a9391611b0644cd4a77a4f7a235c037fdb75308d99b5a23ada6e4a73ed5fbd8f929a981d6bf317d79d52396220c221619303
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/md5/6bf798476c4e94716cc47a95580104ad
+libLLVM.v18.1.7+2.armv7l-linux-musleabihf-cxx11-llvm_version+18.tar.gz/sha512/9dbd27a000dd3c3dda9047d366a667c4b179cc61582525adb0f8227e8055413ce46efcbc1530305400239656e2f1016fb8833fb7f4734714078e035d388f3531
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/66e2889f86ae6bc1977419e6d9be729e
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/d0cac798c4979b4d818d36596b173e523cba3f41ff7ab1e2111f6a75c3e819e563e207a547328f005c5a93c7f8f88c17bf43c1139b5c2690df4f1d719f82920a
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/0534b72d6d33c8573f79dce8a2a5a6e6
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/6beaf1b45eec8b46fbf92f692f53e6df40bf48e50589aeb5ef99240a5a3ec9089ffb350dda6df24530937d613bf6d2cc4da76e92921ea00def9d2d38ac5bbeba
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/2cf9a1ca20472179ce4a9eb3a949457b
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/cebae06ccee12a14d20d3056ce0519b1e774e3c9d9200a783262fcc40aee6d7aabfb08714bf53b88e03d8b09a96d3cda248a70c16188f8c707b291642998262a
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/4712f6a46e0ff407ece958a7701511b9
+libLLVM.v18.1.7+2.i686-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/9a0a2dfa2076b93027f766277a6890cf94d67c131697f74945e92cf13ae64e84c09d3dd744498986fb22ad5e5465300aa9c8ae6632fcf919a0932515edfcc1e6
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/b944ae477232ef10d213b4c7743280fb
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/25ff757620baaf6fbacb375b103dc0dd9af6a23c3d3bca567c182a6357a367ca125d7b6c66927d7db23816865b6ec783157352fba08532336de467be80efcb9c
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/52345a44b3ac74b3cdf93852bbc63710
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/3e5b449b0f1bab302c45f9ee9f04d2cfbb01ce24e86096aa610fdf360ad65828f1b73734beb28b3d3c249ba8ef657d2663c5492940504f47c973038733b15248
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/36e058b96771b4cf77e29b800227fa03
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/98873cb2963c4469b0f69ad1d9d9e27056aabfb46a2642dfa3507b7fe2f0b0fc41c3991a2543125291783699e39fcbcac0bd6e92fa8f0df97609a85c340fd25b
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/3b3823fbafabea289a769958f633dcdb
+libLLVM.v18.1.7+2.i686-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/91a9c1ad6f37cb1186ba3392935fb55d49e0f8d6afc768cf881886f9b1d8b0a2b0ecf0c81a8e32e36d32cac04c065ac852bdb95ba5ff6780c00a763583a02973
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/bbf060d61b294b86f7e3dde381b00b8a
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/632372d41f6e400a10fae27c6cd06a5a344cfb5902cad7928cb4133f14f36f0a3373e69e73ce9baf52f518340593c3a5a16173ef59a1878e6300e9975aeaa157
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/3d730b713e01cdb5a7a5a46028afd41b
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/052ab4fa7ac3b2c430601753ab078cdc9fd6db7f65ee0b76bb05473f4c5b99ec8919ad9d347425f1928cf619548e992c86ba97f9994218f50bca617e43d2f0d9
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/bf9dcb92ba8c031ae62ed4434fd5447f
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/e53be14dd02a2cef8eccafb9301d29c51d652c635703529c1444947002993f6639083eb8bef13af21c9796717ce4b3129dcdcbe2751a1173d39e321db8f6e3c7
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/b5cab0fc7c6643c6dd161f1e553ef1a0
+libLLVM.v18.1.7+2.powerpc64le-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/4032634449e2669479761c4323096b152f8df4948e3a97eea10f0b400fbf2a00d1edda59b74a714b62c4e204b113d8ecda78d828c3344ebe8bd750d14b3c4c7d
+libLLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/md5/054e06d882173ede2886c510e8519c80
+libLLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.asserts.tar.gz/sha512/eb97ec25354badcac1b8a4a09fd9e04cfbb7d35493c54cff82af9ffa4c2dc5070c9232a86e900d6eb9acb03f1c572fcde8d2a865477bf6c9fbfc139763a9dd1c
+libLLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/md5/f1c23200365b659f0dc07cc6d0a32c60
+libLLVM.v18.1.7+2.x86_64-apple-darwin-llvm_version+18.tar.gz/sha512/fad13fef7e7584b3f756fce9125950e788e79608cf5d0c023cb8f8a4e79001afefa8060f7866875e4861a268b3020e50305e66bf472360c1d92fce12d7a81ba9
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/md5/69564913bae176a167d24d3291ef7af7
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.asserts.tar.gz/sha512/b8eeb86b66d767218e59671bdd597623238eea72319913c2ac5e116faec3f4c13739a24f3b95338ed857ec29e714dc0308e4ddbfe359332b3c27ad5235052342
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/md5/bc9d5637fe30f21d2231a98371e798e4
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx03-llvm_version+18.tar.gz/sha512/4efbc2823322abe80d0134d35926767bd9cab717cde9308726a6a8891e5a707476138888c695ed399e3dddb57baf17abbc43a0a338cea2e5c0f472ab427c12e3
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/md5/8492ff91e6dbd1a66edd8aaf0390a582
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.asserts.tar.gz/sha512/6443bd2fa9c5beecc2b002c26595f2cf3a8e2ea5eb49aa4c00f7252a6623fe0f8c01824941ebe5475460641285c4e56a5203056c1b93a78250b7e48fb5ac9e00
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/md5/6918c9978fd8b5887c66eee76950478d
+libLLVM.v18.1.7+2.x86_64-linux-gnu-cxx11-llvm_version+18.tar.gz/sha512/d455a4f433bf3ea1b5100b9d45199bc785e4b6fbc7659bf06cbde6ada471134e7d4243d3a3a1f71d579126ef8371d70e59f174e124b3ff8d4842e9ee83e2dea4
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/md5/075f87d106dd95c8e9c6e7e157b5e9db
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.asserts.tar.gz/sha512/8132379d8f44a21082c7a90f58a7dffb0c6ee725efd58a959d4023787411b080d72913bb1e89a35072f97aaf1ca512ab1d027b37eaed819e3c053d7a0cf64269
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/md5/4cfc2838a77f05883f82e50b3723dcfe
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx03-llvm_version+18.tar.gz/sha512/20079c81cd6a4020b087485be1ab4928b3bd3e1a53728cc98137a35b969484278093bc75a9e51ddfd8331556577c5fb3109d74dc2eccffa93b5390e0fabff2b1
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/md5/5b8cbf00631bd4540b7335a86302a1fe
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.asserts.tar.gz/sha512/51ba9a4b74b740905cee4baf7f4e5f3620ed81e0746f49cd352d874ebedab95277c5031123f880c9239b7dbf505b10f6531f79c8a6b0482a652b8324f4137cf5
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/md5/11010cc2d58b1a8c6a6e7bc24df0c0db
+libLLVM.v18.1.7+2.x86_64-linux-musl-cxx11-llvm_version+18.tar.gz/sha512/a6bdd9a2a2fa9a572e74ced69c3ce9d1b84cde18155ec9bc7dfbaba411ee6c43d229e6fb333eff66fb63b632b485b46b7cb1657c0c49d9d9bb849fa13f0bbc7b
+libLLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/md5/8afe26d16d9fdb0fe6c0248c51b4f053
+libLLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.asserts.tar.gz/sha512/32a92685f417c1887aef3cd8a9cadccc4de3e560ba8fc42e8db721f273a3451927b24dc4a2c2e83446e32a84d47f714fc3c22ce71989f2e97c5ca23a1783b8d6
+libLLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/md5/59d8d911907127ff56f5eafcd8663300
+libLLVM.v18.1.7+2.x86_64-unknown-freebsd-llvm_version+18.tar.gz/sha512/9b0bf6f9d8d32ccbec349c249b79fd0fa3b4949c04b69c9d408f19dfa3b4f00e5cfa51b798234721f72f2793161d6af6491856e10e6a507976b0da6ed7a8065b
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/md5/b0d9a7eca92d40ecbfa47461d52659e2
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.asserts.tar.gz/sha512/dc4a91e164d88ff51b4a642b556d5767156f28d1efafa533f5d7c619e05535e2000afb2ea47469a90f5a19f970e8f0522f35d59ec250e2f9b42ce22fadb9ffd3
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/md5/92a60309ad33391415c6703edbbd5423
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx03-llvm_version+18.tar.gz/sha512/2fe90ac804d94bcf0d4058a8b8f0c274e405ffee7df0175f5e7ccd5014b29a813af48152870e1af0a79df8d3eec3118c233bc4f5b3f8439fd9792931140ee944
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/md5/0964df17cb98d2d869a33468477f9901
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.asserts.tar.gz/sha512/2c062acd62175d32dda773e9116608ced814a64ab06ea73f89958437178e2603b268638e88162fb81c22e5947cf4cc925b1af10c6f9320be22c92b279b278992
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/md5/7dfb8e61e972c66f1d754cb979bc0309
+libLLVM.v18.1.7+2.x86_64-w64-mingw32-cxx11-llvm_version+18.tar.gz/sha512/d462b6fe7aea75f6fee6c5c2f24576569b5deac8027fb88240e16c55a54d68b7dcb06b3ec4ab514616fb88549fc2f10fb1d587a641d6f29fa66273904bb9cfd8
+llvm-julia-18.1.7-2.tar.gz/md5/5c0ae4abc4ce31a86d5d6d4ecabc2683
+llvm-julia-18.1.7-2.tar.gz/sha512/b4d1dde929a8670eec1a9b25abe23fbc926a922e61b60ed99b52b440cd07cb026e7f746878292db4cd0cb422d9b87ecc4ee4b2b141f8e9411855d18da51facb9
 llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
 llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind
index e69de29bb2d1d..a90d28717dd85 100644
--- a/deps/checksums/llvmunwind
+++ b/deps/checksums/llvmunwind
@@ -0,0 +1,32 @@
+LLVMLibUnwind.v14.0.6+0.aarch64-apple-darwin.tar.gz/md5/d8584e0e3dc26ea7404d3719cea9e233
+LLVMLibUnwind.v14.0.6+0.aarch64-apple-darwin.tar.gz/sha512/7a0396eaace91b9b4d013c209605d468a7ff9b99ede9fdd57602539a6fa6f3ea84a440f32840056a1234df3ef1896739ea0820fee72b4f208096c553fc54adb9
+LLVMLibUnwind.v14.0.6+0.aarch64-linux-gnu.tar.gz/md5/d6edea561b61173d05aa79936e49f6b7
+LLVMLibUnwind.v14.0.6+0.aarch64-linux-gnu.tar.gz/sha512/9fbe29ec6a33c719bc9a4dd19911ceded9622269c042192d339a6cf45aa8209ad64c424167c094ca01293438af5930f091acba0538b3fe640a746297f5cc8cb3
+LLVMLibUnwind.v14.0.6+0.aarch64-linux-musl.tar.gz/md5/3ec68c87e4bddd024ee0ca6adc2b3b96
+LLVMLibUnwind.v14.0.6+0.aarch64-linux-musl.tar.gz/sha512/be3cd9d5510c2693dee1494c36c479d32311ff83f5b2d31c08508a3dd370788961ce46e9025afe148a0febd05942fd294370a357dd717bee353d8a108617f6de
+LLVMLibUnwind.v14.0.6+0.armv6l-linux-gnueabihf.tar.gz/md5/8ca5a926d69124225d485d679232a54f
+LLVMLibUnwind.v14.0.6+0.armv6l-linux-gnueabihf.tar.gz/sha512/353f540b342bc54877e7a41fe65c9eeac525fd91bf4cddbe1b3ec2ed93c3751beaf8316a4d31530502b067100b160301262e10cbe4407db3abf1ceb5d9a74eb2
+LLVMLibUnwind.v14.0.6+0.armv6l-linux-musleabihf.tar.gz/md5/4e5b576958f2a2e708eb5918ceef0de0
+LLVMLibUnwind.v14.0.6+0.armv6l-linux-musleabihf.tar.gz/sha512/2e98c472d3ee25c2e062efa4eb21ac9cfc49b26ea9d99ad4a8e7660c4c09f121d31193bd161f54ea332ce94785d601897311e9e6668adb1e25e2b666e0d5bb3f
+LLVMLibUnwind.v14.0.6+0.armv7l-linux-gnueabihf.tar.gz/md5/1c81a886e799663ce8d04400c5b516a9
+LLVMLibUnwind.v14.0.6+0.armv7l-linux-gnueabihf.tar.gz/sha512/236b78b9a17eaae74ab07349ac8dde16c3abbd48e0d075abd1c195d60efff48e2fbf799554df114ea3d3dba937e0369430a2788bde2a1201126e026ef6cdac42
+LLVMLibUnwind.v14.0.6+0.armv7l-linux-musleabihf.tar.gz/md5/0371f43ebcb571d0a635739252b88986
+LLVMLibUnwind.v14.0.6+0.armv7l-linux-musleabihf.tar.gz/sha512/605318ae3737e26ff89d6291311a7db3bc3ec7c8d1f2e72ae40fd3d9df0754ee2ebfb77687122605f26d76d62effb85157bc39982814920d5af46c124e71a5ff
+LLVMLibUnwind.v14.0.6+0.i686-linux-gnu.tar.gz/md5/cd3f1cdf404b6102754ced4bd3a890f6
+LLVMLibUnwind.v14.0.6+0.i686-linux-gnu.tar.gz/sha512/65fe2c5b1e04da1e1d8111a0b0083fa0fa9447eaea7af7a018c09fe6d5506566c491bbad296a7be8c488ca3495016ae16a6879d69f057f8866d94910147dee03
+LLVMLibUnwind.v14.0.6+0.i686-linux-musl.tar.gz/md5/abac9b416d2ba5abcf5ce849f43ffa96
+LLVMLibUnwind.v14.0.6+0.i686-linux-musl.tar.gz/sha512/fed677ed6f103c56eb9dd4578fa37a56ed2a4bc803aa1997c5af19762a623d2f82db1f72f429448d66fcef3b37af2104e6cb782f023aaabef086a921a862b042
+LLVMLibUnwind.v14.0.6+0.i686-w64-mingw32.tar.gz/md5/4c71ffd7c8cabb1c0ed6290b193883c5
+LLVMLibUnwind.v14.0.6+0.i686-w64-mingw32.tar.gz/sha512/6b1421a3268170467225112167cdb33fec962181993a2dad5594d4ee0623ac88ee0588cdc7d0656dc1cb9129ef96f621a97a224731cd161134d7d63c8fd32c16
+LLVMLibUnwind.v14.0.6+0.powerpc64le-linux-gnu.tar.gz/md5/06faf505f0dc354afcd01113cfc57af2
+LLVMLibUnwind.v14.0.6+0.powerpc64le-linux-gnu.tar.gz/sha512/1f9dfbd403e2ce121e126c217baede178cb1323012bb5e3cd1f778ff51e4216aed9dd69036e2baffbd60a6f5ae438ddaba6c13809459e94bb00be3f7bfc8c30e
+LLVMLibUnwind.v14.0.6+0.x86_64-apple-darwin.tar.gz/md5/516a11d99306e3f214968a7951b07a06
+LLVMLibUnwind.v14.0.6+0.x86_64-apple-darwin.tar.gz/sha512/885738599bbd96f20083f9b9368ce3f243bd5868d3ac9a45189de6cb40b6664a6dcdaece159989e504670231db8c2addfa8d544003eb0cdabba960e4ab6a4470
+LLVMLibUnwind.v14.0.6+0.x86_64-linux-gnu.tar.gz/md5/d851b90ea3f9664774316169fc494e21
+LLVMLibUnwind.v14.0.6+0.x86_64-linux-gnu.tar.gz/sha512/a1f529454f0881baaa508481ba97ecffb040fa92141b4cbc72278adcf8b84f0766fa918aea7fb99ce690c4fd80c36fec365987625db42f4e7bb36ad24ce177d0
+LLVMLibUnwind.v14.0.6+0.x86_64-linux-musl.tar.gz/md5/dc4e86eb2effe1f6cb0d0ceda635f226
+LLVMLibUnwind.v14.0.6+0.x86_64-linux-musl.tar.gz/sha512/c52de384853890f9df81aa9e422c1ba3fde12b2ae9c7b60b9ecdc6d0c88eab495dd336af2b6cd2c31d6eddcd0a213954eadbc7884bc39ce2039cec672eac32fe
+LLVMLibUnwind.v14.0.6+0.x86_64-unknown-freebsd.tar.gz/md5/8477e3624c73a820d8ab82a53e1e10fa
+LLVMLibUnwind.v14.0.6+0.x86_64-unknown-freebsd.tar.gz/sha512/32ce031245a5b59a779cd77fa3c9bf05ee59e48c913b75d4964bea49f37da232c59a42ad993f7b5edc88322148c1d7394984349682bfce3b69d33a51756ac8e3
+LLVMLibUnwind.v14.0.6+0.x86_64-w64-mingw32.tar.gz/md5/7be93eccbdb0aff427c43af651073d66
+LLVMLibUnwind.v14.0.6+0.x86_64-w64-mingw32.tar.gz/sha512/89a61a81ec664c72107ac09e717200b00434350bf77064267180bc0c101a59e0ee8c8af4dd6fe75eacdeb14e82743c138b2fc558ca08550d8796b8db93f89da4
diff --git a/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5 b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5
new file mode 100644
index 0000000000000..a834d041324c4
--- /dev/null
+++ b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/md5
@@ -0,0 +1 @@
+c866a3ff71f0640c47cda5d31f76c8e0
diff --git a/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512 b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512
new file mode 100644
index 0000000000000..31eafabe3a66b
--- /dev/null
+++ b/deps/checksums/nvtx-733fb419540bc1d152bc682d2ca066c7bb79da29.tar.gz/sha512
@@ -0,0 +1 @@
+a2db523b0068cb727db7e3a2210578f6d1de46493c5e3a9114ad961ed1553d10b646d11486fe4f987e43a9e2ea289d1923a63861f5fc56cada94bcf0b96b7dc8
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
index ad6b38dc075fa..08bd98646c24b 100644
--- a/deps/checksums/openblas
+++ b/deps/checksums/openblas
@@ -1,94 +1,94 @@
-OpenBLAS.v0.3.27+1.aarch64-apple-darwin-libgfortran5.tar.gz/md5/7bb5c7a169ec7660ec38fe73c74a89d7
-OpenBLAS.v0.3.27+1.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/97266fa0d786bac50f37d82e66da645dfa1b811975045d4aaad1f49361caf7945c06203cb728bf92e9071ec805dff2c75f2b45b346ae4f9cfe289d8f2215e68b
-OpenBLAS.v0.3.27+1.aarch64-linux-gnu-libgfortran3.tar.gz/md5/ea42c557a49aa58172ea0e0f0f93c628
-OpenBLAS.v0.3.27+1.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/baade18c9d8d91f3fb32e44609277a7a6cd827a6c9554e5b21f88d492a0c34e93d29041f691f6b0cd03ab609d5470b1a06e95121781e9622cce301812d6613de
-OpenBLAS.v0.3.27+1.aarch64-linux-gnu-libgfortran4.tar.gz/md5/85a9cbbbf9fff65927a9ff96f17d0792
-OpenBLAS.v0.3.27+1.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/7a0024c509a50c87c9318d209465e0d57fc2e0a8401740666f09d236678eb9d5a1b2fbbfd12c0c409006607a408f03f11c1465841417533010a7843c4af654c1
-OpenBLAS.v0.3.27+1.aarch64-linux-gnu-libgfortran5.tar.gz/md5/4e3c6a68a61b9749ebb55b20728bf0f1
-OpenBLAS.v0.3.27+1.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/dbf9fc5465f60a35849c391069c0a9d6d6fc8685b734d00088e297cf7a6c92fbed67f4264f2b2c164d3c6694d9c8f64b750faa248aa1fd44867d18a94211dc87
-OpenBLAS.v0.3.27+1.aarch64-linux-musl-libgfortran3.tar.gz/md5/c25b607a4df84f9aeb112f24520cabb3
-OpenBLAS.v0.3.27+1.aarch64-linux-musl-libgfortran3.tar.gz/sha512/a99fa75a3cfea19c84a4d455585e53f124e956dd5d4ee7ce0c38c0922b0bebb8b2c996079c3bc63e95444b531ddf9d1f003a22d7f6b55cf99db2334bb1c618ae
-OpenBLAS.v0.3.27+1.aarch64-linux-musl-libgfortran4.tar.gz/md5/3473d20c26f6ad50f3a0b635415858a5
-OpenBLAS.v0.3.27+1.aarch64-linux-musl-libgfortran4.tar.gz/sha512/6e9100e0fcbe1b91c5a4461118739af9d4eca7edd7b8e6ee07a2052c0aaad0ea84c048f0e507ff88da81f47b10c102faf9fe735d13ae1cd35f44396d9a51a864
-OpenBLAS.v0.3.27+1.aarch64-linux-musl-libgfortran5.tar.gz/md5/9ad49254a2827987e622a58a1b8c7b98
-OpenBLAS.v0.3.27+1.aarch64-linux-musl-libgfortran5.tar.gz/sha512/f8a3b9aa52920ce76f5d9550407aeefed5e2596d05b9f8f0643e1da221cf533a09de7a0454a04a2d59a3a2a2fb899a538a5e03b133746415a81415ff926826ba
-OpenBLAS.v0.3.27+1.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/c130634237846672f3a672f1d0e346d9
-OpenBLAS.v0.3.27+1.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/c174d00870ce3944c553122606cba7b78312342a02dc4833a91ae105f05d85d06e665e86a79452bdb7d2b31c18936582d79427ec3976048cf09497011d8c77c8
-OpenBLAS.v0.3.27+1.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/07c58a9399552e3b8362d9c1dd155693
-OpenBLAS.v0.3.27+1.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/98570a4dae80f9b4366c08994911efc87bf6967e63e20b486240a3b2d7637fabbfcca3fe8340ae4d9bae7702be400f5976fc5aa0020f984157b097b02e08d23c
-OpenBLAS.v0.3.27+1.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/25a9af724bb8a5ca42be6277a726583e
-OpenBLAS.v0.3.27+1.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/7afbc7453d1f22171523476e90882f67611374b03a3481bdb588722bc4816d081304b811a0dd452288ca972bea95bd2d2286644bda309dbe25fe721321298e85
-OpenBLAS.v0.3.27+1.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/2f9494f7600729bfa00a0db96bd9349d
-OpenBLAS.v0.3.27+1.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/eae895a7ef4d9922bf9f6c454f56b2881fd5549e6c6a825e0e4d5b84defe9a97719a9f1e62f996dd545afdf372c1ab18bbee0a6cce8474d9adb2522b16678d35
-OpenBLAS.v0.3.27+1.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/8f30a26bd56ced5d6edc88b1fae57beb
-OpenBLAS.v0.3.27+1.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/ad849216c9655dc160a0cd756904442a80d693121e60a2b33876ac347c79fe6e3e602faad0c64a45599f5a5e203c3d9e8316c6b20c41d81e666b7650dccfaa5c
-OpenBLAS.v0.3.27+1.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/272facb48c295ccfea2a291869e1817e
-OpenBLAS.v0.3.27+1.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/7fd5c23046fa548f0bed6e7ce4f6fa809e56909b5595d2a4f348189ee99f234dc84989219ee63cdc004ce303b50fee2aa1fcb93589ff116a2191f8ef520d24be
-OpenBLAS.v0.3.27+1.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/c130634237846672f3a672f1d0e346d9
-OpenBLAS.v0.3.27+1.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/c174d00870ce3944c553122606cba7b78312342a02dc4833a91ae105f05d85d06e665e86a79452bdb7d2b31c18936582d79427ec3976048cf09497011d8c77c8
-OpenBLAS.v0.3.27+1.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/07c58a9399552e3b8362d9c1dd155693
-OpenBLAS.v0.3.27+1.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/98570a4dae80f9b4366c08994911efc87bf6967e63e20b486240a3b2d7637fabbfcca3fe8340ae4d9bae7702be400f5976fc5aa0020f984157b097b02e08d23c
-OpenBLAS.v0.3.27+1.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/25a9af724bb8a5ca42be6277a726583e
-OpenBLAS.v0.3.27+1.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/7afbc7453d1f22171523476e90882f67611374b03a3481bdb588722bc4816d081304b811a0dd452288ca972bea95bd2d2286644bda309dbe25fe721321298e85
-OpenBLAS.v0.3.27+1.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/2f9494f7600729bfa00a0db96bd9349d
-OpenBLAS.v0.3.27+1.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/eae895a7ef4d9922bf9f6c454f56b2881fd5549e6c6a825e0e4d5b84defe9a97719a9f1e62f996dd545afdf372c1ab18bbee0a6cce8474d9adb2522b16678d35
-OpenBLAS.v0.3.27+1.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/8f30a26bd56ced5d6edc88b1fae57beb
-OpenBLAS.v0.3.27+1.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/ad849216c9655dc160a0cd756904442a80d693121e60a2b33876ac347c79fe6e3e602faad0c64a45599f5a5e203c3d9e8316c6b20c41d81e666b7650dccfaa5c
-OpenBLAS.v0.3.27+1.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/272facb48c295ccfea2a291869e1817e
-OpenBLAS.v0.3.27+1.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/7fd5c23046fa548f0bed6e7ce4f6fa809e56909b5595d2a4f348189ee99f234dc84989219ee63cdc004ce303b50fee2aa1fcb93589ff116a2191f8ef520d24be
-OpenBLAS.v0.3.27+1.i686-linux-gnu-libgfortran3.tar.gz/md5/14cee2ac2cff0d9d8b614278e3f7a4ed
-OpenBLAS.v0.3.27+1.i686-linux-gnu-libgfortran3.tar.gz/sha512/d81aa1c8ff70d8d24d2cf88adc568dbf6a77f191332aa0298fbc0faad1fda855c9a6c278d0556003cca315ef75e47cf7caa6963b4e16f4d883ba7c1b13a298bb
-OpenBLAS.v0.3.27+1.i686-linux-gnu-libgfortran4.tar.gz/md5/559f96fb8a2a03df6689200173f2c1df
-OpenBLAS.v0.3.27+1.i686-linux-gnu-libgfortran4.tar.gz/sha512/cc1e987b2ad7d47b474d39b0f93ee6f6e46a4e5d0760cea9e31a0d3c5336e6cfc88401122ab278c0b745c9e60b290f9c05edf39bef9e7e97c70f33dc7afac341
-OpenBLAS.v0.3.27+1.i686-linux-gnu-libgfortran5.tar.gz/md5/c572b06af06609e5e84dc8aee61babc1
-OpenBLAS.v0.3.27+1.i686-linux-gnu-libgfortran5.tar.gz/sha512/d799e280600970697701098f76e79d0bb72bf55cbe8d6c131bd26f6a67bdcb5ed307b26eae89bc6b7cc6b6eb25d2622f952b315f7850b7f231148f14cc09b769
-OpenBLAS.v0.3.27+1.i686-linux-musl-libgfortran3.tar.gz/md5/4aa9f25b39088f79ea13aab1097c0c1f
-OpenBLAS.v0.3.27+1.i686-linux-musl-libgfortran3.tar.gz/sha512/126876d9de1c67302dc1b9b71a96fd2f5eb45745ebbcea6d4b7d4bdfac93088ef6b89e75a2bfcd83f1b32dc798b7ef824bb225e24e88e6443571d0576939bb05
-OpenBLAS.v0.3.27+1.i686-linux-musl-libgfortran4.tar.gz/md5/4ffd9c16cd3c6535457dd654f95c62e6
-OpenBLAS.v0.3.27+1.i686-linux-musl-libgfortran4.tar.gz/sha512/cc7fbe4949b5b51e5f1f5fdae537bbcc68ef4a59a02c290df2f6723bdeb52d98e699e4b23a879372d56279196295d8c938ba2221fe3a73cd1ef953059cdf694f
-OpenBLAS.v0.3.27+1.i686-linux-musl-libgfortran5.tar.gz/md5/7d6855b9a879259216c243dcfc75a2cc
-OpenBLAS.v0.3.27+1.i686-linux-musl-libgfortran5.tar.gz/sha512/221d1ba0250802ae88daac384fd1b2c911c49f8e141efbf3c2668260f4018c5e5f1e21c459a1595652ca48ebc446fe43e54fbf732b47d68f20ecb1e280862570
-OpenBLAS.v0.3.27+1.i686-w64-mingw32-libgfortran3.tar.gz/md5/646fdfccf16f12f23441723e13c12f58
-OpenBLAS.v0.3.27+1.i686-w64-mingw32-libgfortran3.tar.gz/sha512/2692aae16acba199584da71275eb609071d6f7a6d644239f9b6307fe12fc875d6267b11d387b2cace1d5866bf50ab0db619510d02acd3c90696bfb0dfe958037
-OpenBLAS.v0.3.27+1.i686-w64-mingw32-libgfortran4.tar.gz/md5/257e35006373e43fedb211c56b73315a
-OpenBLAS.v0.3.27+1.i686-w64-mingw32-libgfortran4.tar.gz/sha512/e4d8049a6e30763dbacba7646805bb72abad021f8810fb084a287d389137e30b96f12f04ad625c5ef322d127f7b603f388fee18a516e101761391d405ec58d2e
-OpenBLAS.v0.3.27+1.i686-w64-mingw32-libgfortran5.tar.gz/md5/68245d8b061c60f97f48fd4fde4492dd
-OpenBLAS.v0.3.27+1.i686-w64-mingw32-libgfortran5.tar.gz/sha512/511f5fcb538067c04742ad578d2584ebb3cc54bd7c43b84b14d3597bcb84d303a729a48c79018afa119ef12e084bed5ce6fe3591774a1cd6a5b6bbe5df4a8753
-OpenBLAS.v0.3.27+1.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/883728fe99e27d1f066032e1465880b2
-OpenBLAS.v0.3.27+1.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/3363ad953d1d7b5ba233b5d6ff65411e51189adcc6e7a9b68e45388132b38701eba53745f826f896820a98bc5015a8787ab1257f1a25c0a55f0437707c451d20
-OpenBLAS.v0.3.27+1.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/672fb00c47939bfc1893c7bf630b6904
-OpenBLAS.v0.3.27+1.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/e1beb8be0b58402df60b14a81d4fefe13cb0a30450717c80f2670b3a7947a89574848e858f90e0efd5474c47cdb86ce5623645988f05f105df206abd888c2f58
-OpenBLAS.v0.3.27+1.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/32eeeeeb57ed38bb4123ea793faf6685
-OpenBLAS.v0.3.27+1.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/bc505de8d8378e5c0fd6b3092b7093ecae0cacd9d5f6fa94e6e01ead03ffd7abad31c8d75fa84cf6da4f4fd33dde33df968595ecdc818f5b891b82db1be2d1a1
-OpenBLAS.v0.3.27+1.x86_64-apple-darwin-libgfortran3.tar.gz/md5/e49f4562399b5d45d987e9820774f7c8
-OpenBLAS.v0.3.27+1.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/0e5ade0c2112f01b3bde14ddb0fe500085d75fc86117d54bc66cc2da30f7251233387a90daca6203ebe457bc68e8bf3cff62c011b424a971ff9f7932974eaba4
-OpenBLAS.v0.3.27+1.x86_64-apple-darwin-libgfortran4.tar.gz/md5/26c9067086aa013de9c3b4001cd3f78a
-OpenBLAS.v0.3.27+1.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/e641bc045b96cb011183e26541730b46b8dfb401ef1223f10f19450de206d9971f3181d37c7687477d782238e580bbba4fddbcb2094a45761b55dcc93a9cacd4
-OpenBLAS.v0.3.27+1.x86_64-apple-darwin-libgfortran5.tar.gz/md5/a2cf4ac08dc296f6aaf109e8d1fff491
-OpenBLAS.v0.3.27+1.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/2210bc1dfa32b0b9b86ec84943b6673bc540a0822652274ececa0f394ed406d9f23f02909f2b8f97dd2a2bc114df2d0e9a6d868d29bc2d08a3da7176743a6d10
-OpenBLAS.v0.3.27+1.x86_64-linux-gnu-libgfortran3.tar.gz/md5/1b501f18b00d1e051b4af955da81b3c9
-OpenBLAS.v0.3.27+1.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/1b5615dc63efd0166b206bbdc90801d0c623f93f537c320bac1af8bf41f9e3ae8ec33eb6b43a7bd9dc2d9ba526bc7bb200ff828f33ef36da920f9290fa4ff252
-OpenBLAS.v0.3.27+1.x86_64-linux-gnu-libgfortran4.tar.gz/md5/079cebb72efd39454275a8199fc78c17
-OpenBLAS.v0.3.27+1.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/94bdd5db2546381e3cd15bb60b382c11d8ba879f8b88771a15d0d7cbf5a399f46aec60fc01e07258614ec039bf9bf73cbeffc9d2f29b03c9885e63704f0d2ab0
-OpenBLAS.v0.3.27+1.x86_64-linux-gnu-libgfortran5.tar.gz/md5/cef6229311f1616c0db95cef84725cd4
-OpenBLAS.v0.3.27+1.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/297eda815134d5de59d1614b39f06a512f4ef544dc5abaffa015075a8bcba1506aa4825109213e54e94401cbf16d4292a1ec2b9b71b278cc8536379d80d96e46
-OpenBLAS.v0.3.27+1.x86_64-linux-musl-libgfortran3.tar.gz/md5/b410edbbc651fd9f6589fda153b410da
-OpenBLAS.v0.3.27+1.x86_64-linux-musl-libgfortran3.tar.gz/sha512/718c22a940d998dcd8c754994f5a7d9bd3e3131d51beb1d8071ca0005e5c562bb2368924b0c4951839df0bc85a272962f87891b366a1bce1f735cc2b3495b834
-OpenBLAS.v0.3.27+1.x86_64-linux-musl-libgfortran4.tar.gz/md5/af8f2dc642041d5e4eff98d6b20e7596
-OpenBLAS.v0.3.27+1.x86_64-linux-musl-libgfortran4.tar.gz/sha512/48b88f703cc0e35d8f3b3bf7f395481a3225f5c3d1a4277a7b477815feab71df5c6b662313f4762bc8002f43c0f1bece0f383bc3920c09d383303b3927925ddf
-OpenBLAS.v0.3.27+1.x86_64-linux-musl-libgfortran5.tar.gz/md5/eba3e9322d39993d81d78486306b301f
-OpenBLAS.v0.3.27+1.x86_64-linux-musl-libgfortran5.tar.gz/sha512/dc11716e4f7a53a396b8b8cd3e506bd66272e9e8c5533199dc972c91fed0cea5067ec8e14abf67da2b53af7f3189eafc5c188657d617eea3f55ed248d7ed38e4
-OpenBLAS.v0.3.27+1.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/03f45c7c0276f58719235e5da3bcdc85
-OpenBLAS.v0.3.27+1.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/836fbbdae1065393de8ad1410301afbecfb0bf60256322b754e17aa5b4edb20e409eeca2f66f9a2b9ffb5872479cd3cab9b721bd2fc9c3544f5e90e78c7e59c7
-OpenBLAS.v0.3.27+1.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/dc6bc577a3ccd78364e9fcb98fec03dd
-OpenBLAS.v0.3.27+1.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/2c52880b287b0c4f48ed3539e4e0b24b6a05b46d47d7586eea7ca06ebc19c7f0d018fdd24e8da94249fa3b7dc54b85b27ebc530fc5cefb2d9b5457e00dee3529
-OpenBLAS.v0.3.27+1.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/30a6a329d4d37dea7199dfcf264a2641
-OpenBLAS.v0.3.27+1.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/53be21e7a94033cd44b8e2d375b38606446800344698e4f365527d807f736b7b2b9a897138b5de5bd62ba9da104cd6f86bf59caebc18299c0abd98899c527988
-OpenBLAS.v0.3.27+1.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/290a8fc0d1580aeed8cb7b793ff991bf
-OpenBLAS.v0.3.27+1.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/e30e4f666255982c1adbf73020bf88e2d499d2d26216a16c34b4a6b7ed0dc5b7d5374a978a7d0ef5735a82394b4ef06bd82491e2ddf7ec5775953b9183e9f601
-OpenBLAS.v0.3.27+1.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/1b46471021b6914fc60401e3e1ffe78b
-OpenBLAS.v0.3.27+1.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/509d5138d8cd9621937f840b7f73949facec2f047676069403d3d7c482ea183766dc84536d9c2a291b18a2b89902e6f714665fa0b7a920727635530a3aa4aa17
-OpenBLAS.v0.3.27+1.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/ae28948c5d496a3d0bba649c72822b2b
-OpenBLAS.v0.3.27+1.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/8d66a65040e973947a8a7d4f4b1d47d63a77b75c0d5e04843de9188256aeec5a1c7e0d59bf5e1d5c262c4f1a4ff2aa36599840337e9d828fb77724c38c1fff4e
-openblas-6c77e5e314474773a7749357b153caba4ec3817d.tar.gz/md5/4971eeb7adadee085d7c991db416fe7a
-openblas-6c77e5e314474773a7749357b153caba4ec3817d.tar.gz/sha512/7b85c9fb7be54407ba627d77897f40de4395d6d307230aa7df83cf8e0a41f545e4af4ae0576abb40cc9e0c385e1c6a488100dff292ea307439a89587c07ba66f
+OpenBLAS.v0.3.28+2.aarch64-apple-darwin-libgfortran5.tar.gz/md5/312aa603d089d680205dad7d5da58195
+OpenBLAS.v0.3.28+2.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/ffb0069561f52f8ac2f8affe937a00592e0c5d75c6d64bb0d5c93d1c925c93a46b763638031c88818b9dcef4a7b149ee3f15792a812e87f57a8ad086604164c4
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran3.tar.gz/md5/7c43d9e9ac07820130a3d5faefdef882
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/3ade0f098796148c37b118f9c052bad4e40431b4792f001043f040f8b1e4b7c3bae512f56ea21e6c0111246b2200e7720fe720a56a19dd11d1fba789344f29e3
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran4.tar.gz/md5/cd2fe87dac703c8bfa25406aa732b88a
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/2aea68bd8f1db2ac920951c8d9a47ce8c071f3736ee8aad8d185a09be25234a0ffd11b9f9640015b82770ba3b3fad9aa511cc43501c1bb5a3a44f1fb7ccd5692
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran5.tar.gz/md5/e3db2bf2f1f38aeee8530c78f3ec049a
+OpenBLAS.v0.3.28+2.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/a0ccb92e818650ac3cbc292d5af1a000ee9b123953cc3eb16e2479e926af3f2be0ed9858e3c0c1075b1b9dd70ec1e51b9dce2c9d45b999d296aa050d257a3cb1
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran3.tar.gz/md5/5bb605738930037259e773ebdb4a7041
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran3.tar.gz/sha512/967e0f33be7b743d9617627a947a802286962a46c7c3b2418aaa1504cffc5f311b01e1702b35ded18ae3686b1914c6085213b03fa8a51e0a7ca16dc4cfee8504
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran4.tar.gz/md5/ce175e82b9c6597c546552e79a43f934
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran4.tar.gz/sha512/8ff5dff293d9786fc4f541b209b35afcbe325c13ddd0f9c8f9bfca8ba5c318c7890152260a5441b9e9088751ce03b1ff8f0f5d6fd4f142fae34bdb7390d1952c
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran5.tar.gz/md5/cae6aabbdccf31fb78b234785b52d48a
+OpenBLAS.v0.3.28+2.aarch64-linux-musl-libgfortran5.tar.gz/sha512/ac842023e5db243fcfada22adca051bd2ffa04fca496454539931eede159e5d0490d444c338684c2d178c3367b23b8f3d76c544e30f1897bbed181f56237619f
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/5d1f45f53dd1730051095fb8e027b14f
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/0b1f91e86b5078b7cd6b64bc429a0e63bb5adf28df1baa336e67819fbd2c09f59b643c39e580f63e3bbccdc631c5d5e14c7d8afa6af94250453ce5286958f90f
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/8b3e3ea928975c575798d47466aafb82
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ebac0f7047dd8b97d85e4251953a23824701af02754afd6808f13eb276326b30eb292c85fa717fbd2f21b929e6a9816a012b8ea378a0fa27e671f81435f5d3b9
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/5aacfce96d5673b4d8341cb097d22c4a
+OpenBLAS.v0.3.28+2.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/b84dc2b8cbe5453555182c3fcd8624d7a2b28fe3826d54fde3b77ad2c33e60309317d150f07554dd85e168b0ac1f91537a5c2c17fff9c02dd9216f01161e4965
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/dfeac22ee204868cf254dab5ae79382b
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/710117eb7400a0aacf69d6053730eb3b3ff4767f8d38defb2aaad94aebf1646a794489e78a8f46b469901159cdca73dd2b9460fff11e95daa4a2642cab721a25
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/13ff2a40bc55839bdef76b796db1eb76
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/eb61fe6c0221e8f9d7a626b8d088ae1497155341dafb69835e7d53af76689ae212e1e4621e0729df5d896888c0b2d7354a24f7b57fe1d68f0b35c26bcf096699
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/aa7349724ba1d47256705777e755289a
+OpenBLAS.v0.3.28+2.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/25ab56c44b7d0d5de17344f39071e6894e878e89b5e35412a3c9fe345abd2eef76d7816cabb6407c7c521c3bf67a4741b37ad7e580962ead9275273e431f1fb3
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/5d1f45f53dd1730051095fb8e027b14f
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/0b1f91e86b5078b7cd6b64bc429a0e63bb5adf28df1baa336e67819fbd2c09f59b643c39e580f63e3bbccdc631c5d5e14c7d8afa6af94250453ce5286958f90f
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/8b3e3ea928975c575798d47466aafb82
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/ebac0f7047dd8b97d85e4251953a23824701af02754afd6808f13eb276326b30eb292c85fa717fbd2f21b929e6a9816a012b8ea378a0fa27e671f81435f5d3b9
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/5aacfce96d5673b4d8341cb097d22c4a
+OpenBLAS.v0.3.28+2.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/b84dc2b8cbe5453555182c3fcd8624d7a2b28fe3826d54fde3b77ad2c33e60309317d150f07554dd85e168b0ac1f91537a5c2c17fff9c02dd9216f01161e4965
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/dfeac22ee204868cf254dab5ae79382b
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/710117eb7400a0aacf69d6053730eb3b3ff4767f8d38defb2aaad94aebf1646a794489e78a8f46b469901159cdca73dd2b9460fff11e95daa4a2642cab721a25
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/13ff2a40bc55839bdef76b796db1eb76
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/eb61fe6c0221e8f9d7a626b8d088ae1497155341dafb69835e7d53af76689ae212e1e4621e0729df5d896888c0b2d7354a24f7b57fe1d68f0b35c26bcf096699
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/aa7349724ba1d47256705777e755289a
+OpenBLAS.v0.3.28+2.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/25ab56c44b7d0d5de17344f39071e6894e878e89b5e35412a3c9fe345abd2eef76d7816cabb6407c7c521c3bf67a4741b37ad7e580962ead9275273e431f1fb3
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran3.tar.gz/md5/53087cc770708c57d2654fd0095b64df
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran3.tar.gz/sha512/90961448ae40b0445bf881d0815aec54d2096ad235dc8e3db8d698a72961ef9a97e7fcd08f79c83cd1f7c5a341464f52a90351d927d5f1c3e9c8ee32b17970db
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran4.tar.gz/md5/ee910e19faa961bde11fdf90c211df9d
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran4.tar.gz/sha512/f5cfecfe965991cfd7843eff71efa71d6842058565bb63657e909b2942e58a8c7506aa66335308961e59f392da16e1177d79542ae509795566a14122f67a1782
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran5.tar.gz/md5/fe52ba7ca8e16f37aa04b79248e0471d
+OpenBLAS.v0.3.28+2.i686-linux-gnu-libgfortran5.tar.gz/sha512/79b5108886d60f12424709a841e359dc1cf23cef21bb0ee6d1a48043ac48a35dac1637e43c8ebf3f2e10dd34721993a7a12c5776f2975dd5bd7b6e29e1a9adc3
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran3.tar.gz/md5/88d8ff421d29456f1d7670ceaf8867ca
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran3.tar.gz/sha512/91c1bd8142845d11fecba87a719315a14218e3863955ddd2ed82cecd4a2c177a48c660b6aac374ee9a11008245c0ced1bae70eaf5a1a6e3114db02e09a96396f
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran4.tar.gz/md5/3035066a53032b551e49f56b323e941d
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran4.tar.gz/sha512/f218e152a1c92bd374599814612add8010aedc78113cbe06465e8a1ee7f66892bb654cad687aa55555e74f3a65d74608692d41c9f0ce6c0bc63475ef62ab55b7
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran5.tar.gz/md5/f7cf36ac9a0cbb535952ec73f2e6c9ea
+OpenBLAS.v0.3.28+2.i686-linux-musl-libgfortran5.tar.gz/sha512/00ab052d9fa4a72a640545782019f24ed6017b36aa89c5e659ce73b1e821817f560c09f71b26c027c0a05bd13567c71a6d7f5995d1c39ab233bec56cd3a7fd9e
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran3.tar.gz/md5/b65414bb15539e5aa2f5f1c7984edb94
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran3.tar.gz/sha512/847ada020bb92fe6ea81dfffaf855707a529c9c0f7e246e802b9521e5c7d4aa36104d04279c09a905a797184cdf05a6fabf84711b7661ecb14e9ac2fba251f61
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran4.tar.gz/md5/0b626ebb8b3fc49b946723a9a2a21a91
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran4.tar.gz/sha512/b5bba23878399fc1ff20abc2e2eb4acb9691ce982f290e33384732452774a0b447bd0fb01ee696d10ad8b03d99eec905662af92bd3b499d9fe6db419e05d2573
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran5.tar.gz/md5/cb99d7d4944c5283a1a0142683e1d377
+OpenBLAS.v0.3.28+2.i686-w64-mingw32-libgfortran5.tar.gz/sha512/b77d3225e60f49506917bfff78c187df7157dbc834eccda2fa03d03eef8214b225682888a411a8b6e4b29a8d7e2b0ca625ea8c56b84ecc39e1f4f1012523c096
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/c6e5d4867a068e08b3f56f474e498b81
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/de6249439758a501bfd27d3ef04ec04cc06edf64de73f0709a6a40a2eaf40bd3d5d77dfd54b7b19e2f6bf6c104b4416d3e225faa0cff4cb631785c08d90b8614
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/32e70466cfa3cfec65ab4cad3abc5f03
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/2642385a5e9fc8e9c3839a5a44f9753b21b5078725f7d0c3e1ebe96b76129a3b8e2627d92629dee4f6fd7e8e51e86a7fbedc80cbe4d1a6812cea363559950da0
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/e2332831bd88d57132241697952819e7
+OpenBLAS.v0.3.28+2.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/ad03edf9ac56bf6311f0ca70a1bc359242accfe82cba9e42f39f6cb1c3006226179ff9be8218847889cae10fac13bc33f60837e1e3249e309172da7fbc25400f
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran3.tar.gz/md5/27c24775af446a44a72a28ffd197696d
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/2af8caa33bee88efff84653f3932b04e8fd4aabb1bf16d49fa73657b0ec13c9457fde7ab3f953fc9b01da5c2841c3c9b588e3b0f559b89df0e6268468d1f7cc8
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran4.tar.gz/md5/414e701d918d5fba08a12de6979db4b5
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/949886d388b80e19b944d102852f2bb58ffa03c42e624986dd9dc076797c996634d4a8fc0f04544451d6848c2079969816979e1f68a999b2747e9dd5472be7a6
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran5.tar.gz/md5/29fcf62c0280cc10f91d22189a2e8de8
+OpenBLAS.v0.3.28+2.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/02e75d4ecf9cd922157a72c0ca2e713cf336b125df3982cd5f7cc4f2a04367ad4c2b1190ca2a0a9df8b639c7ebcfc9783066e99dd0b13acde7b02038391e8567
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran3.tar.gz/md5/147d5e8eb2ec78fc8a31bdb091fab001
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/2319eda568800c0b1f2d96a8a36c59b1bbd792c06de1d740aea3f1e49798242426ea8d10c100c42c3c281702e2b4f5b673b6ab5252b276d48542e875bcaa3094
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran4.tar.gz/md5/448857d9c4b2e95afc12a14c75b24055
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/3e7c8cd55e0b15a30992b1e0b48a6e2ae36fd9babf689fa5595c7de94aec401de1d7821d45a22bf14cd5c45c708bc8fa3511d34d732dadd4daaca3f49e200bdb
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran5.tar.gz/md5/3aaf417685b44e0e505208f7b31b981a
+OpenBLAS.v0.3.28+2.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/f7b1d123e48ede93fe624a79d9535a8915bfa3441d7a6f9c6643467027414c9f2538e299858ea98bbb49d4e6d385a6a491063cb1878ac3b0b3d6a8f7ff0a48df
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran3.tar.gz/md5/5723136deaaf4b2e5960fb0774943288
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran3.tar.gz/sha512/127ea8b2b0d8d4586a23a2b8ecbf148d512efe68626e89b0688c3c9e29ed9420b45ae86755c1467313c565f9f3835762051d7086a815b813dbe6e9eb05fb4be1
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran4.tar.gz/md5/80b1b9cf5346916edda653174a987aa2
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran4.tar.gz/sha512/77e1387ec969bbed4945d2a598a1cd04d258265c4b2d5c43af92118eb32e0c69e40619a20ea1835f277febcfea068b241343d44932afef832bdcfd2e9f618f0a
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran5.tar.gz/md5/44dcedf01c938d1a1c67dd3bc90ab61d
+OpenBLAS.v0.3.28+2.x86_64-linux-musl-libgfortran5.tar.gz/sha512/e490d49b8d41d73ab3e71aca8c691ca58704f0fc6930cbfcc203f97b8db8d83144bad597a2c53ff0c0c4f7c40316d975a1b589a3603873d508f6beeb75970c5b
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/0e8a7e88b54cb836292c289d1c456fa9
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/0e9b3af6839b9c41c950bb4d8b739f0243a890af7092ef9f3a00e4931f2acc3820afb78e40c7bfef716dcd3230c1d0acc7b0b37f30eb47441b476bd7540745e6
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/5fc47ad55780c99ef9cab7ef1b26d9c0
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/c531201e4abddd652efeb5801658f5c1e4891578f181e99d6e41fc0d3bc6347b82e5e928ff8a717ee1e75bb0a6a765260bf7c99fce44aa24c21f1c5a5e3c1e3b
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/dc127f3ab984b5d47b325d5701ab73cd
+OpenBLAS.v0.3.28+2.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/50850911703320894a2e1e996c5de4613b5f9e3012f5cbf591f3677799599c45d9cc4c42cf310bdc6ba91ef550e52f6424bbbabdf47f96748d4669d94e6b46a4
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/937847e2ad00539f3422d1ecb9d26d55
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/751d889661ddd46cd5718b49e34f826a4fb34b1b992251a5a975bc0af15b74a75d8a56f403e8fae570223477b2b8927d9cb36764e4b9e466045d5f317b8e7196
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/180c54c50362d05696589b270693ee8f
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/2e3b76be5b7c4a7dc45f07e17493abd7ef9185e92429d8fa4d38766e0da96dd0777b619a9e420d2e1142bdab2ae1f755f9bc9ad97ee9a7927741778f89b9135f
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/2f0fac7c96af66ea63fce26e409f4db6
+OpenBLAS.v0.3.28+2.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/141522971447c38b4908342f3ad09ffb18142d2e79b44f66fd80047b44c09216c9b94c39f776e3093f9ceb6bc4d6270cbbfb4209b2fc0debfe93e7145cb4dbff
+openblas-5ef8b1964658f9cb6a6324a06f6a1a022609b0c5.tar.gz/md5/f7a1fe86cefbf7d4f2608843c7833ca7
+openblas-5ef8b1964658f9cb6a6324a06f6a1a022609b0c5.tar.gz/sha512/5f6020e958967a12a3c5b18bde13331f9c0602bd073563f35cd7cec848c92b45f30ca362819b12cd16989c0e4641ee3e63db8322d1092f61b31ba2e4068dd7a7
diff --git a/deps/checksums/patchelf b/deps/checksums/patchelf
index e2029b83f14fc..6392e44d8f2e8 100644
--- a/deps/checksums/patchelf
+++ b/deps/checksums/patchelf
@@ -1,2 +1,2 @@
-patchelf-0.18.0.tar.bz2/md5/9b091a689583fdc7c3206679586322d5
-patchelf-0.18.0.tar.bz2/sha512/bf26194ca3435b141dd330890fcc0c9d805d0ad6a537901dabe6707a13cd28e7e6217462f3ebb3cb4861302dd8632342ec988fc18246c35332a94f2b349d4f4f
+patchelf-0.17.2.tar.bz2/md5/d76db4f1a27b0934d0b0d0585b081c0f
+patchelf-0.17.2.tar.bz2/sha512/8277adf95513f88fb190536a38bdfdf438a4cc7685d8a130bdffbe064441f0f25095b6c83bbb190133e1a138963776d15b46c247dd2f1a073a1bfe1d1dbdd503
diff --git a/deps/checksums/suitesparse b/deps/checksums/suitesparse
index eec27cb539d0f..acec99b39879c 100644
--- a/deps/checksums/suitesparse
+++ b/deps/checksums/suitesparse
@@ -1,36 +1,34 @@
-SuiteSparse-7.7.0.tar.gz/md5/e659373ed5e9b961d2fcb6d67d250783
-SuiteSparse-7.7.0.tar.gz/sha512/aa62dae81ae423ce7162ae83b46e5cf606d95482e6c6bb7ae6d61e15987761119d9418ef3a96648e6ba2327871a2847eef8ace197aa375279d71c80329d6f451
-SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/md5/46541001073d1c3c85e18d910f8308f3
-SuiteSparse-e8285dd13a6d5b5cf52d8124793fc4d622d07554.tar.gz/sha512/f7470a447b934ca9315e216a07b97e363f11bc93186f9aa057b20b2d05092c58ae4f1b733de362de4a0730861c00be4ca5588d0b3ba65f018c1798b9122b9672
-SuiteSparse.v7.7.0+0.aarch64-apple-darwin.tar.gz/md5/276f7355e36eeab2911a141e5570dede
-SuiteSparse.v7.7.0+0.aarch64-apple-darwin.tar.gz/sha512/72aa979c3a4f6d2fa65f4d16ab106a7b306f5e84da91bf04a7a11bd863f71a8386ca5248b7e3fde83347cf912fae8ec3c87617db09f6bfadf12c476061855d28
-SuiteSparse.v7.7.0+0.aarch64-linux-gnu.tar.gz/md5/4c3ab9c8c451198420516bd84fdd079f
-SuiteSparse.v7.7.0+0.aarch64-linux-gnu.tar.gz/sha512/7afb088a9b117f79531d828a458419e0e8901daa635eeb1b5c753d60c26784496095f2bf70c5c3dedfc5a1c8dd04c56cd8408667fedcbd06abcec0a41a1171bb
-SuiteSparse.v7.7.0+0.aarch64-linux-musl.tar.gz/md5/e12af599488fa7578fb8f2018969f4c5
-SuiteSparse.v7.7.0+0.aarch64-linux-musl.tar.gz/sha512/c9e1c2938754dc3b7704e373f36cc876b592acac06c945860958e56f26e09b7be6ce58c4a9184d3528bcc1458d1f7ab9bd605b9a11083419e849e9fa2cc93f2b
-SuiteSparse.v7.7.0+0.armv6l-linux-gnueabihf.tar.gz/md5/a3912a6af26ff19d3fcd166d8426f1ff
-SuiteSparse.v7.7.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/5f724f5cfb526f2db7d184976f1711f09f77d548593ef9c28ae98a15b6927303864535761929fcd729448d9ece8a7f599cf82d0a83a7668966bdd8b6b62b641f
-SuiteSparse.v7.7.0+0.armv6l-linux-musleabihf.tar.gz/md5/24ab4184bf83e59e029cf950be56f1c5
-SuiteSparse.v7.7.0+0.armv6l-linux-musleabihf.tar.gz/sha512/9f1b05c48b051b3c0440e7075f84105a5c5e8e2c8685d93fac847e1cbbf5427ba623ecde16d9b2293b0c286326bfbce07f8d2906a892065fa9fe3d36a4c0386b
-SuiteSparse.v7.7.0+0.armv7l-linux-gnueabihf.tar.gz/md5/8433d1206bc72053c1936a1e5f76ea30
-SuiteSparse.v7.7.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/d5f3249db8bb3a4f216d3abef0416e090c1b4d0a847d814df03f3585159602a31b8e4edffae36c3cc39b5c79691c15d51a085b746f03b86d9a0a9b18d00332d9
-SuiteSparse.v7.7.0+0.armv7l-linux-musleabihf.tar.gz/md5/8651a96c9b5617287c917b07d9f6fb16
-SuiteSparse.v7.7.0+0.armv7l-linux-musleabihf.tar.gz/sha512/3e3f21083a8cd26919d6592be41f531ce4293a9e05a84d5298a4d6c3c222892d6d364c30c75558a1461020ac5446fd51e88a333d03118d74eb28ea33a3386d3b
-SuiteSparse.v7.7.0+0.i686-linux-gnu.tar.gz/md5/184c95889dfb07319b9ee51e2ff12d0e
-SuiteSparse.v7.7.0+0.i686-linux-gnu.tar.gz/sha512/5424a42418e033c67e0868dd7878990158a9f099f2e3ed04aed45c6ceff0a828080df6eae004e10a3784136f66ac13da46df0b3bb3c96fc32c7bdf02830af41f
-SuiteSparse.v7.7.0+0.i686-linux-musl.tar.gz/md5/0bde5fe930ec4e2e90945b6bfd78e8d2
-SuiteSparse.v7.7.0+0.i686-linux-musl.tar.gz/sha512/1ff4c8e578146cca72c1bd74cddbba5999053e5729fdb217b0e4f1c0d5cbcae5a73f466e72a52e92979e5f8cc2549b1c5222c7ca32b628db0b71e129a2d22714
-SuiteSparse.v7.7.0+0.i686-w64-mingw32.tar.gz/md5/5439e41ed1909ffe4ba28669eb45ef43
-SuiteSparse.v7.7.0+0.i686-w64-mingw32.tar.gz/sha512/380999433f0a2c1d65a1bf6ea48da60e6cead831cfc31ab3df0ba122afbc32b2e14fb3d8d578a909b9f39f2763923816a691863756996ea064a595e58a788b98
-SuiteSparse.v7.7.0+0.powerpc64le-linux-gnu.tar.gz/md5/ea08ebbd5aaae629a194450c25a77d2e
-SuiteSparse.v7.7.0+0.powerpc64le-linux-gnu.tar.gz/sha512/cfe6675e6a6b7790de8a6a3de2dbf561770fa63113c66890a3f888fba71e20c77edaa89b23cdf0038f3a870be9bd5e351aa84b774e7da833c9c0c90e05c0e9fb
-SuiteSparse.v7.7.0+0.x86_64-apple-darwin.tar.gz/md5/314a033b51d6d239e29a91fcca911260
-SuiteSparse.v7.7.0+0.x86_64-apple-darwin.tar.gz/sha512/77147381738484d147ce529b4e9d3dff9bccbe5ed07071b5df647a785f118e46792f739f145d597ef78c871d75759348109ad3e08125fb58dd12b8a6813a8fcc
-SuiteSparse.v7.7.0+0.x86_64-linux-gnu.tar.gz/md5/f62f17fc50b15e0a4a117f77c52b35f3
-SuiteSparse.v7.7.0+0.x86_64-linux-gnu.tar.gz/sha512/0ba022a5d0039b1348a09521cc2bd366df8c6603a7d3de4bf7d1b15504add8607bf5fa2bcf7d95b2b48cb676c17cc516903323615b6a668e53310363a3f6b242
-SuiteSparse.v7.7.0+0.x86_64-linux-musl.tar.gz/md5/d9b77034590bb0511f2ea2d726303f94
-SuiteSparse.v7.7.0+0.x86_64-linux-musl.tar.gz/sha512/a1149ec6f50b978b1bad91662035d8d131d431459e1910b2cd9fe0307f50d23ca15148f1af522db04327e8cc9cc7c04f85852ddb606ac82fa346b4ab70d28752
-SuiteSparse.v7.7.0+0.x86_64-unknown-freebsd.tar.gz/md5/7b7f00672f0880e397a5182da084c334
-SuiteSparse.v7.7.0+0.x86_64-unknown-freebsd.tar.gz/sha512/06696d78cd7e385906e2fbfbd8ec804de5a4a3d8134d30bc105f713eb915742204e4226229b33a93740f30a3ff24d48dde651e64a78bc6d937e84ce484f6dd74
-SuiteSparse.v7.7.0+0.x86_64-w64-mingw32.tar.gz/md5/91b2e33ead8c2898881475ddfe202987
-SuiteSparse.v7.7.0+0.x86_64-w64-mingw32.tar.gz/sha512/cb5f2caff872ba2ab66f1285e264b4c28ec0a05a4a0fea3964c22aa167195b57a9d9de2c9b9289438459c6b1c1b9f047807414b3e1305e87642edabd22973bd6
+SuiteSparse-7.8.0.tar.gz/md5/ad42a80d28bb56a1fce15f6e7332e04e
+SuiteSparse-7.8.0.tar.gz/sha512/91aff0aee26e938ba88a8f92db15b0db0ecc6ada3b60153bb299f53a45ccda131db4bc66f890c220034c900180d0bb3a5fb3e2686fec7d6174f5900a3ee64424
+SuiteSparse.v7.8.0+0.aarch64-apple-darwin.tar.gz/md5/38379e14a53663a9c23f32ed56801676
+SuiteSparse.v7.8.0+0.aarch64-apple-darwin.tar.gz/sha512/3f2a7aa7778a22d150bad9ecb8d03edfa75707a07545e65660c8ccc4b0a9fb058ccab29e21e4728741d40d390d28922d521d3841e16258cf8e26acacadfc1fbd
+SuiteSparse.v7.8.0+0.aarch64-linux-gnu.tar.gz/md5/bc52c7df0a442c0fb9aafb83d60878f4
+SuiteSparse.v7.8.0+0.aarch64-linux-gnu.tar.gz/sha512/436e79ea0774d6ffb571b513e385ef48d9cc70b72010cffdc23d606ad6c8984c8b49e2422ce8881def0722f3f608e4ecb87e6752dd80cf7988addd330c5ded13
+SuiteSparse.v7.8.0+0.aarch64-linux-musl.tar.gz/md5/87e4c2588efc39723621ac5010ddf2e5
+SuiteSparse.v7.8.0+0.aarch64-linux-musl.tar.gz/sha512/17115826716bb48f16e4593941be275d47012d112e54d8826c75fde119ffc9f66accd02353b309365b59779d7af3ac220f31ab7cf7eea165b209a93ecdc4102f
+SuiteSparse.v7.8.0+0.armv6l-linux-gnueabihf.tar.gz/md5/b1490603aa129942d8e4c9581853cd0a
+SuiteSparse.v7.8.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/e23c3532784e295ae72b811d285c3729c3f8ac1b5ee1621e831b6b2824a5b357e4bfa49e09174de7763fc3ebcab6b84ef16536bc1cf6f4bc0543b1b229209178
+SuiteSparse.v7.8.0+0.armv6l-linux-musleabihf.tar.gz/md5/f8199358882f76dd30bcce741b837de1
+SuiteSparse.v7.8.0+0.armv6l-linux-musleabihf.tar.gz/sha512/2c8d4ec21bfe253d3d32a5f5f09601b9b2864149f63f53067b157f5f7315fb04236bf5b19a1e5b4569e2c73127dcbb1703d56c7d06fc3ab9ae155902b7a1c2a9
+SuiteSparse.v7.8.0+0.armv7l-linux-gnueabihf.tar.gz/md5/cc3aa1a013cc91e7076dddf20fba9f60
+SuiteSparse.v7.8.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/a6b8cfbc345a089f12e55d8d44061dcce30f94c2d79fc520d6c5dfe433ac2e362d049fac72278cb59d4b3760ca08d5e350b7e2658fa5e8c77ce8608f67c2c4c4
+SuiteSparse.v7.8.0+0.armv7l-linux-musleabihf.tar.gz/md5/0d7797d31c30c53bf219cdc0a48e64dc
+SuiteSparse.v7.8.0+0.armv7l-linux-musleabihf.tar.gz/sha512/a7df8938ee6a04f62169bedd29c8408951cf33a43e0f529fb4d1e360bdad6462a50b2af297adb5f51fd726e1ced1fc8fcda7feeeafbeb44000bfe02a8e29c29e
+SuiteSparse.v7.8.0+0.i686-linux-gnu.tar.gz/md5/e48fa3d2e00f210e964c21e4ff27efae
+SuiteSparse.v7.8.0+0.i686-linux-gnu.tar.gz/sha512/3088c2af476285eb8549cf6aa56381156d49513a274348f86fbf01aa9ce0712961471f83fa50b261f3f365a302b88eb20ef0bb35b58c07a2cfb5dc337fdb72c1
+SuiteSparse.v7.8.0+0.i686-linux-musl.tar.gz/md5/e55202dbeca107a0c25a4f09d5d68915
+SuiteSparse.v7.8.0+0.i686-linux-musl.tar.gz/sha512/0f4de2e62016914b4d1bcb9b13bd8cb2bebefc5f0a532e103948b9aae79a20462ac7b74a3e968d4f99076c37dbbafb747699cd151e831ff89d297f78478fb84f
+SuiteSparse.v7.8.0+0.i686-w64-mingw32.tar.gz/md5/cb971bc1042196e527f95015c8bc5ef8
+SuiteSparse.v7.8.0+0.i686-w64-mingw32.tar.gz/sha512/d445a7790e3ac5392f75c9f4ec30cd1c812354b04388b4c6c6cea2423d2f0dac7173b17a8a2b7a7f4af10321601f96819a7702f9beac0397d85916d99493bc39
+SuiteSparse.v7.8.0+0.powerpc64le-linux-gnu.tar.gz/md5/12058f122b548a37070770d1847f3ce9
+SuiteSparse.v7.8.0+0.powerpc64le-linux-gnu.tar.gz/sha512/f375feeb8448ea90ce8d9f31c7e1230f6868316f06094ba0155069dded4f8da2e1b54d462ef9cfc77abd76147740d4066236dcf1fcea91f8a7141819962ad0ae
+SuiteSparse.v7.8.0+0.x86_64-apple-darwin.tar.gz/md5/1bd473f2a25f1ebcea8acc858e2594b4
+SuiteSparse.v7.8.0+0.x86_64-apple-darwin.tar.gz/sha512/034af137deee5bf0ebf3746745d09ad50ce135cd4768a2049bb9811478ff90e6ed8e2c990e277b4c3b38a3a5e9eaa856938eb86239ca445fa64b6dab6af7e996
+SuiteSparse.v7.8.0+0.x86_64-linux-gnu.tar.gz/md5/c58a86d9f25e6705941105d9e41f084c
+SuiteSparse.v7.8.0+0.x86_64-linux-gnu.tar.gz/sha512/56447062802f01815ffb014624423c6fd3ab6e16b642b2fe37972a151b02865965c95ca3d1a455c6d51cd31633aea8a732b235b55d68e6779c17b293c488fa43
+SuiteSparse.v7.8.0+0.x86_64-linux-musl.tar.gz/md5/ba6e10ba61c209df94f18ab51fe2dd90
+SuiteSparse.v7.8.0+0.x86_64-linux-musl.tar.gz/sha512/3b8fc504cfb4a3b628d5b955a482bad08c85e09e529f833855a84b847721247aaa469f96adef6b218a1ba5896cde91664cc819ba33115e3cc309e72140841ca3
+SuiteSparse.v7.8.0+0.x86_64-unknown-freebsd.tar.gz/md5/a50c69142a42c14edac4ce94b86b138a
+SuiteSparse.v7.8.0+0.x86_64-unknown-freebsd.tar.gz/sha512/963be0dccd1a594df08fe5135ef4ac13e1d707841c3e97d31ba5477d0d6ec26bad9be1c52d9fd78f199740a53950353adbdd767469f3bf01ea1e3ee843eb6c1a
+SuiteSparse.v7.8.0+0.x86_64-w64-mingw32.tar.gz/md5/7ca11ba89bd09183cc5a9320d6e8a4a7
+SuiteSparse.v7.8.0+0.x86_64-w64-mingw32.tar.gz/sha512/e1d5def1103bbf0bb29c08cdd3bf21ba60456353694985c66f8e55a31d54a32c5b891e56e1ffe30f9e1223c49283d267e483e2f1b999f566099c239b3eed1d78
diff --git a/deps/checksums/terminfo b/deps/checksums/terminfo
new file mode 100644
index 0000000000000..bd971e72b1be8
--- /dev/null
+++ b/deps/checksums/terminfo
@@ -0,0 +1,2 @@
+TermInfoDB-v2023.12.9.any.tar.gz/md5/573d9b5adaf6af500e3dfae6e3d15ebf
+TermInfoDB-v2023.12.9.any.tar.gz/sha512/e0a5bfe54346f9d5690a840628b329f6fac7375b0d29337bc70813ae3553a72bb397f8034d221c544289e40c4cfc685d5805777b7528f05bbe0123b5905c24a4
diff --git a/deps/checksums/unwind b/deps/checksums/unwind
index 7ef31e6bda06b..317809053abeb 100644
--- a/deps/checksums/unwind
+++ b/deps/checksums/unwind
@@ -1,26 +1,26 @@
-LibUnwind.v1.8.1+0.aarch64-linux-gnu.tar.gz/md5/e25a186941b2bedeb4a0fca60b1e5d1b
-LibUnwind.v1.8.1+0.aarch64-linux-gnu.tar.gz/sha512/4b488ef13b1b09d37dd2d2f62647e6407404730beb8cab58263c2d8e9db3716bfdb8949eca8ebb126eb22a3fcd81deb7ea0774fe7527ba7374f76047fe03abd7
-LibUnwind.v1.8.1+0.aarch64-linux-musl.tar.gz/md5/75fea80870d951a5e87d37bc67e52cfb
-LibUnwind.v1.8.1+0.aarch64-linux-musl.tar.gz/sha512/efb54577cddaf5e7930b15cdd98ed88e4d60ba3a1fe0097b2a64a868f92177985c71a86cfb40475976005ab55a01401960afa9c20649b1e34ea02ef262caa046
-LibUnwind.v1.8.1+0.armv6l-linux-gnueabihf.tar.gz/md5/30f3077b185f6e51b8b6ddfddcb8effb
-LibUnwind.v1.8.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/524810edbcfcba4938cb63c325905569b7d232dd8b02856e5f1592d7e36620c3ee166c0c788e42a14abc281c41723f49563f59d8cf5175ae1c3605ec29a97b9f
-LibUnwind.v1.8.1+0.armv6l-linux-musleabihf.tar.gz/md5/087d263a8edacec1b79d4eccef03ab53
-LibUnwind.v1.8.1+0.armv6l-linux-musleabihf.tar.gz/sha512/bad2bea6f98ed9e0ac293ab3cd7873d2c164616bd09103ad773300da1875e28ac51744809629d01b69744c610d93c90cc48ec4c81411b5d3f036db86e098adcd
-LibUnwind.v1.8.1+0.armv7l-linux-gnueabihf.tar.gz/md5/218f8a37d910bcfaba1bbeb9f61593a1
-LibUnwind.v1.8.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/1912b7aa4bbcaca3facad13bf9a8a8b4bb42183b9c542c6b51f0f4a715c27b7583dcf36f49a1fac9787ba7b39728a5d1a151661a570ef637d1080c11d5426fc4
-LibUnwind.v1.8.1+0.armv7l-linux-musleabihf.tar.gz/md5/c2582785ca7dc2edbc529a93ea0f4120
-LibUnwind.v1.8.1+0.armv7l-linux-musleabihf.tar.gz/sha512/ae5414a274d973623070402806eb279dd2ab708c801fa7f24ba9b8066e7fc13ae9ebe1f331f76dd54a4eba572e87117c57d502190b63978af87d7fa35a011632
-LibUnwind.v1.8.1+0.i686-linux-gnu.tar.gz/md5/324ae0c4916a435a6746ca77a1034b58
-LibUnwind.v1.8.1+0.i686-linux-gnu.tar.gz/sha512/fe5ac30e6cdda9f99c873a7af60407c5f1ca1d17396ab46679df56093fea37289e802dd53ed083a4963f7439a1887b4d401a9ab489bdeddd2d003b761af84c1c
-LibUnwind.v1.8.1+0.i686-linux-musl.tar.gz/md5/0495beea1d8e5e4572f32830125cb329
-LibUnwind.v1.8.1+0.i686-linux-musl.tar.gz/sha512/3db7f9241e11e139f02239826a65f40d77d968aa7dde574cf91759706dc9a5c97fb055b34ec011f9ac085eec121c3807e9c873773d1ab091a5a7180200ea73ec
-LibUnwind.v1.8.1+0.powerpc64le-linux-gnu.tar.gz/md5/1f0feb7cced4b847295dff4c1cd0dde1
-LibUnwind.v1.8.1+0.powerpc64le-linux-gnu.tar.gz/sha512/88707b4a45e3de2901a343f20a35d2003d24db6604a5194712a3a687299b98e7507934a1bd4d7a21f84f089e0378964334c483f10311dd1bfbaa5d8b42ab9f76
-LibUnwind.v1.8.1+0.x86_64-linux-gnu.tar.gz/md5/a03c84494c04ba08fa7e314584d28945
-LibUnwind.v1.8.1+0.x86_64-linux-gnu.tar.gz/sha512/eb97ec8cf03fc5cb77a6218fcc4f1ef1266e66a774dea34e1d1fb7f89c026287bb4bd09de0b61a83b42495b8b4d5be475a61b4df68c83bfb33be2145ed659627
-LibUnwind.v1.8.1+0.x86_64-linux-musl.tar.gz/md5/194654cfd8d202599b7096783659c0ab
-LibUnwind.v1.8.1+0.x86_64-linux-musl.tar.gz/sha512/f39f8d0488ec02d9693b4a17ca73ec683ea062cfc67400d02e1e38bfeb43c371068742379d5e17f8c8b4ab478de48f91284e17b0e1b94e09d1a64713276326c7
-LibUnwind.v1.8.1+0.x86_64-unknown-freebsd.tar.gz/md5/6453d66204ba5fb941046afd85345b90
-LibUnwind.v1.8.1+0.x86_64-unknown-freebsd.tar.gz/sha512/77e67c3ddda5eaee0e8b127ad8e2ad41add4410e356c4e4b9bc46eb19871b91d006a59009d9948c4cc0951c2d9e956a99c946a60ba47ceb7f827b2897d6939e5
+LibUnwind.v1.8.1+1.aarch64-linux-gnu.tar.gz/md5/0f789b9e5b2604a39cc363c4c513a808
+LibUnwind.v1.8.1+1.aarch64-linux-gnu.tar.gz/sha512/4c9c8250bfd84a96135a5e9ecdd4500214996c39852609d3a3983c2c5de44a728d9ce6b71bd649c1725e186db077f74df93a99f07452a31d344c17315eedb33d
+LibUnwind.v1.8.1+1.aarch64-linux-musl.tar.gz/md5/356deb10e57d4c7e7bf7dbc728d6628d
+LibUnwind.v1.8.1+1.aarch64-linux-musl.tar.gz/sha512/a998eebe7a4928bd417620bef0de9728c080f5d9714f15314ac190b333efa1bd7a21207156d56c132515bd3f7154d60204f1fac2dac5468560a7017682527c78
+LibUnwind.v1.8.1+1.armv6l-linux-gnueabihf.tar.gz/md5/b0ff12f5f0c801e5e280a142a1b7a188
+LibUnwind.v1.8.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/68003f39eaf55c8742e821a228889590e8673cbafb74013a5b4f6a0c08ee372cb6b102a574e89ce9f46a38dd3d31ef75de95762f72a31a8ec9d7f495affaeb77
+LibUnwind.v1.8.1+1.armv6l-linux-musleabihf.tar.gz/md5/b04c77d707875989777ecfed66bd2dad
+LibUnwind.v1.8.1+1.armv6l-linux-musleabihf.tar.gz/sha512/fb20586a0cbc998a0482d4102d8b8e5b2f802af519e25c440a64f67554468b29c6999a9ec5509ba375714beb93a4b48e8dbf71e6089c25ecd63b11eead844041
+LibUnwind.v1.8.1+1.armv7l-linux-gnueabihf.tar.gz/md5/e948016b4179d34727b456bc768cd8e1
+LibUnwind.v1.8.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/6fc64e8ac7248540b95c321103d234f2c8633087f261e368251fe2cf6ea4e0654325716ac7017ae966edc4ddbb004a0f808d6e25cca766faaf505ca1f8f4aee7
+LibUnwind.v1.8.1+1.armv7l-linux-musleabihf.tar.gz/md5/660cf49c34a2ead1afbdcb44491e174a
+LibUnwind.v1.8.1+1.armv7l-linux-musleabihf.tar.gz/sha512/edf337d176440c210f5860e90771758335256fe9d2f179d506656bccf92a9f9aa478d176d4b0db2213945ae847dad5bb88265110c92cfcd538d5740858b6a3f0
+LibUnwind.v1.8.1+1.i686-linux-gnu.tar.gz/md5/7032a70cfecb88cdd49cc3a4879456c6
+LibUnwind.v1.8.1+1.i686-linux-gnu.tar.gz/sha512/e34acc8f270c5156ede3ac3377d0f428c672daed869570734351c6b5a8946d65b5c0c041b713dddefedef81e55c65f5683aed0fec0d366e2d0207d8b902b0e33
+LibUnwind.v1.8.1+1.i686-linux-musl.tar.gz/md5/0541c3419020334173d299cf3482ff85
+LibUnwind.v1.8.1+1.i686-linux-musl.tar.gz/sha512/0b57745d280fb9893772936cd4872b0e04f41d86379e772b889e75baffe9324ef8dd168bb4c9761c1b8372f387ce99721dd6086b1d52b9a91215f40e8113968d
+LibUnwind.v1.8.1+1.powerpc64le-linux-gnu.tar.gz/md5/fee37734fe95d1e96ebc77316df64192
+LibUnwind.v1.8.1+1.powerpc64le-linux-gnu.tar.gz/sha512/953ef70fb203db73764eeab0a37521b94e79ce70644ae16fe3157ca8d1011a0319d1928d094a3e2ed1e0489fdc0ca7dda33722095fd3aa40ed1fde150cf44c2a
+LibUnwind.v1.8.1+1.x86_64-linux-gnu.tar.gz/md5/bbb201e7455fd13b805b0a96dc16183b
+LibUnwind.v1.8.1+1.x86_64-linux-gnu.tar.gz/sha512/b1e21f7d772bd15bada17d287e1876ae586a97c6a8669e714347e7bf8a9b202fe53e8559cf19358f88bc458b2fe15ccbd616b64163cc715ce253f43f5133a8cd
+LibUnwind.v1.8.1+1.x86_64-linux-musl.tar.gz/md5/72156f9d6da9a2742d9152822e5525f5
+LibUnwind.v1.8.1+1.x86_64-linux-musl.tar.gz/sha512/53a3f1985c5ae4816693f292604810cbe948e6332aeb227fb900ba3730f4379e863b144ae87af2c0651c2b9633b35c45c7a0a6fa34958dc9f58e0f8baa2ea701
+LibUnwind.v1.8.1+1.x86_64-unknown-freebsd.tar.gz/md5/e4346df03246d847f2867df3ab5ac624
+LibUnwind.v1.8.1+1.x86_64-unknown-freebsd.tar.gz/sha512/ee01bc12726288ae091476c1bed44de224a9ef5355687fd6fd64742da6628450434d7f33d4daf81029263aa6d23549a0aa5c5ae656599c132051255d1d742d5d
 libunwind-1.8.1.tar.gz/md5/10c96118ff30b88c9eeb6eac8e75599d
 libunwind-1.8.1.tar.gz/sha512/aba7b578c1b8cbe78f05b64e154f3530525f8a34668b2a9f1ee6acb4b22c857befe34ad4e9e8cca99dbb66689d41bc72060a8f191bd8be232725d342809431b3
diff --git a/deps/clang.version b/deps/clang.version
index 76ddb503b3c8c..fcd55b72de5ff 100644
--- a/deps/clang.version
+++ b/deps/clang.version
@@ -3,4 +3,4 @@
 ## jll artifact
 # Clang (paired with LLVM, only here as a JLL download)
 CLANG_JLL_NAME := Clang
-CLANG_JLL_VER  := 17.0.6+4
+CLANG_JLL_VER  := 18.1.7+2
diff --git a/deps/curl.mk b/deps/curl.mk
index 444334b581fed..ae2830c3cd4f2 100644
--- a/deps/curl.mk
+++ b/deps/curl.mk
@@ -37,7 +37,7 @@ checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 # Disable....almost everything
 CURL_CONFIGURE_FLAGS := $(CONFIGURE_COMMON) \
 	--without-gnutls --without-libidn2 --without-librtmp \
-	--without-nss --without-libpsl --without-libgsasl --without-fish-functions-dir \
+	--without-libpsl --without-libgsasl --without-fish-functions-dir \
 	--disable-ares --disable-manual --disable-ldap --disable-ldaps --disable-static \
 	--without-gssapi --without-brotli
 # A few things we actually enable
@@ -57,7 +57,15 @@ CURL_TLS_CONFIGURE_FLAGS := --with-mbedtls=$(build_prefix)
 endif
 CURL_CONFIGURE_FLAGS += $(CURL_TLS_CONFIGURE_FLAGS)
 
-$(BUILDDIR)/curl-$(CURL_VER)/build-configured: $(SRCCACHE)/curl-$(CURL_VER)/source-extracted
+$(SRCCACHE)/curl-$(CURL_VER)/curl-8.6.0-build.patch-applied: $(SRCCACHE)/curl-$(CURL_VER)/source-extracted
+	cd $(dir $@) && \
+		patch -p1 -f < $(SRCDIR)/patches/curl-8.6.0-build.patch
+	echo 1 > $@
+
+$(SRCCACHE)/curl-$(CURL_VER)/source-patched: $(SRCCACHE)/curl-$(CURL_VER)/curl-8.6.0-build.patch-applied
+	echo 1 > $@
+
+$(BUILDDIR)/curl-$(CURL_VER)/build-configured: $(SRCCACHE)/curl-$(CURL_VER)/source-patched
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
 	$(dir $<)/configure $(CURL_CONFIGURE_FLAGS) \
diff --git a/deps/ittapi.mk b/deps/ittapi.mk
index 505b7c8c28c64..b62b981a34ddb 100644
--- a/deps/ittapi.mk
+++ b/deps/ittapi.mk
@@ -14,7 +14,7 @@ $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured: $(SRCCACHE)/$(ITTAPI_SRC_DIR)/so
 	echo 1 > $@
 
 $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-compiled: $(BUILDDIR)/$(ITTAPI_SRC_DIR)/build-configured
-	$(CMAKE) --build $(dir $<)
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 define ITTAPI_INSTALL
diff --git a/deps/libgit2.mk b/deps/libgit2.mk
index 162ad9fc513dd..b65ac022885a3 100644
--- a/deps/libgit2.mk
+++ b/deps/libgit2.mk
@@ -51,12 +51,12 @@ $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: $(LIBGIT2_SRC_PATH)/source-extr
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-compiled: $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured
-	$(CMAKE) --build $(dir $<)
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-checked: $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-compiled
 ifeq ($(OS),$(BUILD_OS))
-	$(CMAKE) --build $(dir $@) test
+	$(MAKE) -C $(dir $@) test
 endif
 	echo 1 > $@
 
diff --git a/deps/libssh2.mk b/deps/libssh2.mk
index 54d40dabbce1e..c293d8309d2bc 100644
--- a/deps/libssh2.mk
+++ b/deps/libssh2.mk
@@ -49,12 +49,12 @@ $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: $(LIBSSH2_SRC_PATH)/source-extr
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured
-	$(CMAKE) --build $(dir $<)
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-checked: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled
 ifeq ($(OS),$(BUILD_OS))
-	$(CMAKE) --build $(dir $@) test
+	$(MAKE) -C $(dir $@) test
 endif
 	echo 1 > $@
 
diff --git a/deps/libsuitesparse.mk b/deps/libsuitesparse.mk
index c014686b1866e..85b2c23473a18 100644
--- a/deps/libsuitesparse.mk
+++ b/deps/libsuitesparse.mk
@@ -59,8 +59,8 @@ $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: | $(build_prefix)/
 
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-patched
 	cd $(dir $<) && $(CMAKE) . $(LIBSUITESPARSE_CMAKE_FLAGS)
-	$(CMAKE) --build $(dir $<)
-	$(CMAKE) --install $(dir $<)
+	$(MAKE) -C $(dir $<)
+	$(MAKE) -C $(dir $<) install
 	echo 1 > $@
 
 ifeq ($(OS),WINNT)
@@ -70,7 +70,7 @@ LIBSUITESPARSE_SHLIB_ENV:=LD_LIBRARY_PATH="$(build_shlibdir)"
 endif
 $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
 	for PROJ in $(shell echo $(subst ;, ,$(LIBSUITESPARSE_PROJECTS))); do \
-		$(LIBSUITESPARSE_SHLIB_ENV) $(CMAKE) --build $(dir $<)$${PROJ} default $(LIBSUITESPARSE_MFLAGS) || exit 1; \
+		$(LIBSUITESPARSE_SHLIB_ENV) $(MAKE) -C $(dir $<)$${PROJ} default $(LIBSUITESPARSE_MFLAGS) || exit 1; \
 	done
 	echo 1 > $@
 
@@ -113,9 +113,7 @@ uninstall-libsuitesparse:
 endef
 
 remove-libsuitesparse-gpl-lib:
-ifeq ($(USE_GPL_LIBS),1)
-	@echo This build contains [GPL-2.0+] libs:  libcholmod librbio libspqr libumfpack
-else
+ifeq ($(USE_GPL_LIBS),0)
 	@echo Removing GPL libs...
 	-rm -f $(build_bindir)/libcholmod*
 	-rm -f $(build_bindir)/libklu_cholmod*
diff --git a/deps/libsuitesparse.version b/deps/libsuitesparse.version
index 3131908a4a298..6f841190cebc7 100644
--- a/deps/libsuitesparse.version
+++ b/deps/libsuitesparse.version
@@ -4,5 +4,5 @@
 LIBSUITESPARSE_JLL_NAME := SuiteSparse
 
 ## source build
-LIBSUITESPARSE_VER := 7.7.0
-LIBSUITESPARSE_SHA1=13806726cbf470914d012d132a85aea1aff9ee77
+LIBSUITESPARSE_VER := 7.8.0
+LIBSUITESPARSE_SHA1=58e6558408f6a51c08e35a5557d5e68cae32147e
diff --git a/deps/libtracyclient.mk b/deps/libtracyclient.mk
index 814f336584e98..92d6bee4caea6 100644
--- a/deps/libtracyclient.mk
+++ b/deps/libtracyclient.mk
@@ -55,7 +55,9 @@ $(LIBTRACYCLIENT_BUILDDIR)/build-configured: $(LIBTRACYCLIENT_BUILDDIR)/libTracy
 
 $(LIBTRACYCLIENT_BUILDDIR)/build-compiled: $(LIBTRACYCLIENT_BUILDDIR)/build-configured
 	cd $(LIBTRACYCLIENT_BUILDDIR) && \
-		$(CMAKE) --build .
+		$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build .)
 	echo 1 > $@
 
 $(eval $(call staged-install, \
diff --git a/deps/libuv.version b/deps/libuv.version
index bc8e2e57c9517..ebfc63927d9db 100644
--- a/deps/libuv.version
+++ b/deps/libuv.version
@@ -1,7 +1,9 @@
+# -*- makefile -*-
+
 ## jll artifact
 LIBUV_JLL_NAME := LibUV
 
 ## source build
 LIBUV_VER := 2
 LIBUV_BRANCH=julia-uv2-1.48.0
-LIBUV_SHA1=ca3a5a431a1c37859b6508e6b2a288092337029a
+LIBUV_SHA1=af4172ec713ee986ba1a989b9e33993a07c60c9e
diff --git a/deps/lld.version b/deps/lld.version
index 431c1b7a75032..3ca9960164e27 100644
--- a/deps/lld.version
+++ b/deps/lld.version
@@ -2,4 +2,4 @@
 
 ## jll artifact
 LLD_JLL_NAME := LLD
-LLD_JLL_VER := 17.0.6+4
+LLD_JLL_VER := 18.1.7+2
diff --git a/deps/llvm-tools.version b/deps/llvm-tools.version
index 3609c54ddc98f..1fcc8944dc769 100644
--- a/deps/llvm-tools.version
+++ b/deps/llvm-tools.version
@@ -3,5 +3,5 @@
 ## jll artifact
 # LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
 LLVM_TOOLS_JLL_NAME := LLVM
-LLVM_TOOLS_JLL_VER := 17.0.6+4
-LLVM_TOOLS_ASSERT_JLL_VER := 17.0.6+4
+LLVM_TOOLS_JLL_VER := 18.1.7+2
+LLVM_TOOLS_ASSERT_JLL_VER := 18.1.7+2
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 592736fe4a149..3f4bc3e6746f0 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -86,22 +86,23 @@ endif
 LLVM_CMAKE += -DLLVM_WINDOWS_PREFER_FORWARD_SLASH=False
 
 # Allow adding LLVM specific flags
-LLVM_CFLAGS += $(CFLAGS)
-LLVM_CXXFLAGS += $(CXXFLAGS)
+LLVM_CFLAGS += $(CFLAGS) $(BOLT_CFLAGS)
+LLVM_CXXFLAGS += $(CXXFLAGS) $(BOLT_CFLAGS)
 LLVM_CPPFLAGS += $(CPPFLAGS)
 LLVM_LDFLAGS += $(LDFLAGS)
+LLVM_LDFLAGS += $(BOLT_LDFLAGS)
 LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING="$(LLVM_TARGETS)" -DCMAKE_BUILD_TYPE="$(LLVM_CMAKE_BUILDTYPE)"
 LLVM_CMAKE += -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="$(LLVM_EXPERIMENTAL_TARGETS)"
 LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
-LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=ON -DZLIB_LIBRARY="$(build_prefix)/lib"
-LLVM_CMAKE += -DCOMPILER_RT_ENABLE_IOS=OFF -DCOMPILER_RT_ENABLE_WATCHOS=OFF -DCOMPILER_RT_ENABLE_TVOS=OFF
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=FORCE_ON -DZLIB_ROOT="$(build_prefix)"
+LLVM_CMAKE += -DLLVM_ENABLE_ZSTD=OFF
 ifeq ($(USE_POLLY_ACC),1)
 LLVM_CMAKE += -DPOLLY_ENABLE_GPGPU_CODEGEN=ON
 endif
 LLVM_CMAKE += -DLLVM_TOOLS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_UTILS_INSTALL_DIR=$(call rel_path,$(build_prefix),$(build_depsbindir))
 LLVM_CMAKE += -DLLVM_INCLUDE_UTILS=ON -DLLVM_INSTALL_UTILS=ON
-LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_HISTEDIT_H=Off -DHAVE_LIBEDIT=Off
+LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_ENABLE_BINDINGS=OFF -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERMINFO=Off -DHAVE_LIBEDIT=Off -DLLVM_ENABLE_LIBEDIT=OFF
 ifeq ($(LLVM_ASSERTIONS), 1)
 LLVM_CMAKE += -DLLVM_ENABLE_ASSERTIONS:BOOL=ON
 endif # LLVM_ASSERTIONS
@@ -268,7 +269,9 @@ $(LLVM_BUILDDIR_withtype)/build-configured: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-e
 
 $(LLVM_BUILDDIR_withtype)/build-compiled: $(LLVM_BUILDDIR_withtype)/build-configured
 	cd $(LLVM_BUILDDIR_withtype) && \
-		$(CMAKE) --build .
+		$(if $(filter $(CMAKE_GENERATOR),make), \
+		  $(MAKE), \
+		  $(CMAKE) --build .)
 	echo 1 > $@
 
 $(LLVM_BUILDDIR_withtype)/build-checked: $(LLVM_BUILDDIR_withtype)/build-compiled
@@ -289,6 +292,9 @@ ifeq ($(OS),Darwin)
 # https://github.com/JuliaLang/julia/issues/29981
 LLVM_INSTALL += && ln -s libLLVM.dylib $2$$(build_shlibdir)/libLLVM-$$(LLVM_VER_SHORT).dylib
 endif
+ifeq ($(BUILD_LLD), 1)
+LLVM_INSTALL += && cp $2$$(build_bindir)/lld$$(EXE) $2$$(build_depsbindir)
+endif
 
 $(eval $(call staged-install, \
 	llvm,$$(LLVM_SRC_DIR)/build_$$(LLVM_BUILDTYPE), \
diff --git a/deps/llvm.version b/deps/llvm.version
index c02a52008fe25..8e4180ef5a277 100644
--- a/deps/llvm.version
+++ b/deps/llvm.version
@@ -2,14 +2,14 @@
 
 ## jll artifact
 LLVM_JLL_NAME := libLLVM
-LLVM_ASSERT_JLL_VER := 17.0.6+4
+LLVM_ASSERT_JLL_VER := 18.1.7+2
 ## source build
 # Version number of LLVM
-LLVM_VER := 17.0.6
+LLVM_VER := 18.1.7
 # Git branch name in `LLVM_GIT_URL` repository
-LLVM_BRANCH=julia-17.0.6-4
+LLVM_BRANCH=julia-18.1.7-2
 # Git ref in `LLVM_GIT_URL` repository
-LLVM_SHA1=julia-17.0.6-4
+LLVM_SHA1=julia-18.1.7-2
 
 ## Following options are used to automatically fetch patchset from Julia's fork.  This is
 ## useful if you want to build an external LLVM while still applying Julia's patches.
@@ -18,6 +18,6 @@ LLVM_APPLY_JULIA_PATCHES := 0
 # GitHub repository to use for fetching the Julia patches to apply to LLVM source code.
 LLVM_JULIA_DIFF_GITHUB_REPO := https://github.com/llvm/llvm-project
 # Base GitHub ref for generating the diff.
-LLVM_BASE_REF := llvm:llvmorg-17.0.6
+LLVM_BASE_REF := llvm:llvmorg-18.1.7
 # Julia fork's GitHub ref for generating the diff.
-LLVM_JULIA_REF := JuliaLang:julia-17.0.6-4
+LLVM_JULIA_REF := JuliaLang:julia-18.1.7-2
diff --git a/deps/llvmunwind.version b/deps/llvmunwind.version
index 7d13af9a158f7..9c2a91c566ba2 100644
--- a/deps/llvmunwind.version
+++ b/deps/llvmunwind.version
@@ -2,4 +2,4 @@
 LLVMUNWIND_JLL_NAME := LLVMLibUnwind
 
 ## source build
-LLVMUNWIND_VER := 12.0.1
+LLVMUNWIND_VER := 14.0.6
diff --git a/deps/mbedtls.mk b/deps/mbedtls.mk
index 79f6dadf1d383..39cf817d70658 100644
--- a/deps/mbedtls.mk
+++ b/deps/mbedtls.mk
@@ -39,12 +39,12 @@ $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured: $(SRCCACHE)/$(MBEDTLS_SRC)/source-e
 	echo 1 > $@
 
 $(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled: $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured
-	$(CMAKE) --build $(dir $<)
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(BUILDDIR)/$(MBEDTLS_SRC)/build-checked: $(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled
 ifeq ($(OS),$(BUILD_OS))
-	$(CMAKE) --build $(dir $@) test
+	$(MAKE) -C $(dir $@) test
 endif
 	echo 1 > $@
 
diff --git a/deps/nvtx.mk b/deps/nvtx.mk
new file mode 100644
index 0000000000000..c4d4db2deba65
--- /dev/null
+++ b/deps/nvtx.mk
@@ -0,0 +1,31 @@
+## nvtx ##
+include $(SRCDIR)/nvtx.version
+
+NVTX_GIT_URL := https://github.com/NVIDIA/NVTX.git
+NVTX_TAR_URL = https://api.github.com/repos/NVIDIA/NVTX/tarball/$1
+$(eval $(call git-external,nvtx,NVTX,,,$(SRCCACHE)))
+
+$(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured: $(SRCCACHE)/$(NVTX_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	echo 1 > $@
+
+$(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured
+	echo 1 > $@
+
+define NVTX_INSTALL
+	cp -a $(SRCCACHE)/$(NVTX_SRC_DIR)/c/include $2/$$(build_includedir)/
+endef
+
+$(eval $(call staged-install, \
+	nvtx,$(NVTX_SRC_DIR), \
+	NVTX_INSTALL,,,))
+
+get-nvtx: $(NVTX_SRC_FILE)
+extract-nvtx: $(SRCCACHE)/$(NVTX_SRC_DIR)/source-extracted
+configure-nvtx: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-configured
+compile-nvtx: $(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled
+fastcheck-nvtx: #none
+check-nvtx: #none
+
+clean-nvtx:
+	-rm -f $(BUILDDIR)/$(NVTX_SRC_DIR)/build-compiled
diff --git a/deps/nvtx.version b/deps/nvtx.version
new file mode 100644
index 0000000000000..e26c55cae095e
--- /dev/null
+++ b/deps/nvtx.version
@@ -0,0 +1,4 @@
+# -*- makefile -*-
+## source build
+NVTX_BRANCH=dev
+NVTX_SHA1=733fb419540bc1d152bc682d2ca066c7bb79da29
diff --git a/deps/openblas.mk b/deps/openblas.mk
index 1bc068d2859d9..affd1c7a7aa55 100644
--- a/deps/openblas.mk
+++ b/deps/openblas.mk
@@ -90,7 +90,12 @@ $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied: $(BUILDDIR)/$(OP
 		patch -p1 -f < $(SRCDIR)/patches/openblas-winexit.patch
 	echo 1 > $@
 
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-memory-buffer-multi-threading.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied
+	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/openblas-memory-buffer-multi-threading.patch
+	echo 1 > $@
+
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-memory-buffer-multi-threading.patch-applied
 	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
 		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
 	echo 1 > $@
diff --git a/deps/openblas.version b/deps/openblas.version
index 527764e3f8603..09dcdc45af1ef 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -3,9 +3,9 @@
 OPENBLAS_JLL_NAME := OpenBLAS
 
 ## source build
-OPENBLAS_VER := 0.3.27
-OPENBLAS_BRANCH=v0.3.27
-OPENBLAS_SHA1=6c77e5e314474773a7749357b153caba4ec3817d
+OPENBLAS_VER := 0.3.28
+OPENBLAS_BRANCH=v0.3.28
+OPENBLAS_SHA1=5ef8b1964658f9cb6a6324a06f6a1a022609b0c5
 
 # LAPACK, source-only
 LAPACK_VER := 3.9.0
diff --git a/deps/patchelf.version b/deps/patchelf.version
index 9038338d45faf..6e4f32a0c2fe4 100644
--- a/deps/patchelf.version
+++ b/deps/patchelf.version
@@ -1,3 +1,4 @@
 ## source build
 # Patchelf (we don't ship this or even use a JLL, we just always build it)
-PATCHELF_VER := 0.18.0
+# NOTE: Do not upgrade this to 0.18+ until https://github.com/NixOS/patchelf/issues/492 is fixed
+PATCHELF_VER := 0.17.2
diff --git a/deps/patches/curl-8.6.0-build.patch b/deps/patches/curl-8.6.0-build.patch
new file mode 100644
index 0000000000000..827b02808d505
--- /dev/null
+++ b/deps/patches/curl-8.6.0-build.patch
@@ -0,0 +1,23 @@
+From 5cc2b016c36aaf5a08e2feb7c068fca5bb0a8052 Mon Sep 17 00:00:00 2001
+From: Daniel Stenberg <daniel@haxx.se>
+Date: Mon, 5 Feb 2024 15:22:08 +0100
+Subject: [PATCH] md4: include strdup.h for the memdup proto
+
+Reported-by: Erik Schnetter
+Fixes #12849
+Closes #12863
+---
+ lib/md4.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/lib/md4.c b/lib/md4.c
+index 067c211e420afd..58dd1166cf924f 100644
+--- a/lib/md4.c
++++ b/lib/md4.c
+@@ -28,6 +28,7 @@
+
+ #include <string.h>
+
++#include "strdup.h"
+ #include "curl_md4.h"
+ #include "warnless.h"
diff --git a/deps/patches/libunwind-disable-initial-exec-tls.patch b/deps/patches/libunwind-disable-initial-exec-tls.patch
new file mode 100644
index 0000000000000..c6718ac2db98f
--- /dev/null
+++ b/deps/patches/libunwind-disable-initial-exec-tls.patch
@@ -0,0 +1,44 @@
+diff --git a/include/libunwind-common.h.in b/include/libunwind-common.h.in
+index 893fdd69..80ab9648 100644
+--- a/include/libunwind-common.h.in
++++ b/include/libunwind-common.h.in
+@@ -340,5 +340,6 @@ extern int unw_get_elf_filename_by_ip (unw_addr_space_t, unw_word_t, char *,
+ extern const char *unw_strerror (int);
+ extern int unw_backtrace (void **, int);
+ extern int unw_backtrace2 (void **, int, unw_context_t*, int);
++extern int unw_ensure_tls (void);
+ 
+ extern unw_addr_space_t unw_local_addr_space;
+diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
+index 7a5d7e1f..8453ffb0 100644
+--- a/src/dwarf/Gparser.c
++++ b/src/dwarf/Gparser.c
+@@ -623,7 +623,7 @@ get_rs_cache (unw_addr_space_t as, intrmask_t *saved_maskp)
+ #if defined(HAVE___CACHE_PER_THREAD) && HAVE___CACHE_PER_THREAD
+   if (likely (caching == UNW_CACHE_PER_THREAD))
+     {
+-      static _Thread_local struct dwarf_rs_cache tls_cache __attribute__((tls_model("initial-exec")));
++      static _Thread_local struct dwarf_rs_cache tls_cache;
+       Debug (16, "using TLS cache\n");
+       cache = &tls_cache;
+     }
+diff --git a/src/mi/init.c b/src/mi/init.c
+index e4431eeb..07cae852 100644
+--- a/src/mi/init.c
++++ b/src/mi/init.c
+@@ -82,3 +82,15 @@ mi_init (void)
+   unw_init_page_size();
+   assert(sizeof(struct cursor) <= sizeof(unw_cursor_t));
+ }
++
++int
++unw_ensure_tls (void)
++{
++#if defined(HAVE___CACHE_PER_THREAD) && HAVE___CACHE_PER_THREAD
++  static _Thread_local int alloc_trigger;
++  alloc_trigger = 1;
++  return alloc_trigger;
++#else
++  return 0;
++#endif
++}
diff --git a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch b/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch
deleted file mode 100644
index 4e3897dfb9801..0000000000000
--- a/deps/patches/llvm-libunwind-revert-monorepo-requirement.patch
+++ /dev/null
@@ -1,156 +0,0 @@
-Upstream commit 8c03fdf34a659925a3f09c8f54016e47ea1c7519 changed the build such
-that it requires living inside the monorepo with libcxx available, only so that
-it can reuse a CMake file to simplify some build steps. This patch is a revert
-of that commit applied only to libunwind.
-
----
-diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
-index 570b8db90653..a383d7d77d6f 100644
---- a/libunwind/CMakeLists.txt
-+++ b/libunwind/CMakeLists.txt
-@@ -1,7 +1,3 @@
--if (NOT IS_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/../libcxx")
--  message(FATAL_ERROR "libunwind requires being built in a monorepo layout with libcxx available")
--endif()
--
- #===============================================================================
- # Setup Project
- #===============================================================================
-@@ -15,31 +11,103 @@ set(CMAKE_MODULE_PATH
-   ${CMAKE_MODULE_PATH}
-   )
- 
--set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
--set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
--set(LIBUNWIND_LIBCXX_PATH "${CMAKE_CURRENT_LIST_DIR}/../libcxx" CACHE PATH
--        "Specify path to libc++ source.")
--
- if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_BUILD)
-   project(libunwind LANGUAGES C CXX ASM)
- 
-+  # Rely on llvm-config.
-+  set(CONFIG_OUTPUT)
-+  if(NOT LLVM_CONFIG_PATH)
-+    find_program(LLVM_CONFIG_PATH "llvm-config")
-+  endif()
-+  if (DEFINED LLVM_PATH)
-+    set(LLVM_INCLUDE_DIR ${LLVM_INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-+    set(LLVM_PATH ${LLVM_PATH} CACHE PATH "Path to LLVM source tree")
-+    set(LLVM_MAIN_SRC_DIR ${LLVM_PATH})
-+    set(LLVM_CMAKE_PATH "${LLVM_PATH}/cmake/modules")
-+  elseif(LLVM_CONFIG_PATH)
-+    message(STATUS "Found LLVM_CONFIG_PATH as ${LLVM_CONFIG_PATH}")
-+    set(CONFIG_COMMAND ${LLVM_CONFIG_PATH} "--includedir" "--prefix" "--src-root")
-+    execute_process(COMMAND ${CONFIG_COMMAND}
-+                    RESULT_VARIABLE HAD_ERROR
-+                    OUTPUT_VARIABLE CONFIG_OUTPUT)
-+    if (NOT HAD_ERROR)
-+      string(REGEX REPLACE "[ \t]*[\r\n]+[ \t]*" ";"
-+             CONFIG_OUTPUT ${CONFIG_OUTPUT})
-+    else()
-+      string(REPLACE ";" " " CONFIG_COMMAND_STR "${CONFIG_COMMAND}")
-+      message(STATUS "${CONFIG_COMMAND_STR}")
-+      message(FATAL_ERROR "llvm-config failed with status ${HAD_ERROR}")
-+    endif()
-+
-+    list(GET CONFIG_OUTPUT 0 INCLUDE_DIR)
-+    list(GET CONFIG_OUTPUT 1 LLVM_OBJ_ROOT)
-+    list(GET CONFIG_OUTPUT 2 MAIN_SRC_DIR)
-+
-+    set(LLVM_INCLUDE_DIR ${INCLUDE_DIR} CACHE PATH "Path to llvm/include")
-+    set(LLVM_BINARY_DIR ${LLVM_OBJ_ROOT} CACHE PATH "Path to LLVM build tree")
-+    set(LLVM_MAIN_SRC_DIR ${MAIN_SRC_DIR} CACHE PATH "Path to LLVM source tree")
-+    set(LLVM_LIT_PATH "${LLVM_PATH}/utils/lit/lit.py")
-+
-+    # --cmakedir is supported since llvm r291218 (4.0 release)
-+    execute_process(
-+      COMMAND ${LLVM_CONFIG_PATH} --cmakedir
-+      RESULT_VARIABLE HAD_ERROR
-+      OUTPUT_VARIABLE CONFIG_OUTPUT
-+      ERROR_QUIET)
-+    if(NOT HAD_ERROR)
-+      string(STRIP "${CONFIG_OUTPUT}" LLVM_CMAKE_PATH_FROM_LLVM_CONFIG)
-+      file(TO_CMAKE_PATH "${LLVM_CMAKE_PATH_FROM_LLVM_CONFIG}" LLVM_CMAKE_PATH)
-+    else()
-+      file(TO_CMAKE_PATH "${LLVM_BINARY_DIR}" LLVM_BINARY_DIR_CMAKE_STYLE)
-+      set(LLVM_CMAKE_PATH "${LLVM_BINARY_DIR_CMAKE_STYLE}/lib${LLVM_LIBDIR_SUFFIX}/cmake/llvm")
-+    endif()
-+  else()
-+    message(WARNING "UNSUPPORTED LIBUNWIND CONFIGURATION DETECTED: "
-+                    "llvm-config not found and LLVM_MAIN_SRC_DIR not defined. "
-+                    "Reconfigure with -DLLVM_CONFIG=path/to/llvm-config "
-+                    "or -DLLVM_PATH=path/to/llvm-source-root.")
-+  endif()
-+
-+  if (EXISTS ${LLVM_CMAKE_PATH})
-+    list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_PATH}")
-+    include("${LLVM_CMAKE_PATH}/AddLLVM.cmake")
-+    include("${LLVM_CMAKE_PATH}/HandleLLVMOptions.cmake")
-+  else()
-+    message(WARNING "Not found: ${LLVM_CMAKE_PATH}")
-+  endif()
-+
-   set(PACKAGE_NAME libunwind)
-   set(PACKAGE_VERSION 12.0.1)
-   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
-   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
- 
--  # Add the CMake module path of libcxx so we can reuse HandleOutOfTreeLLVM.cmake
--  set(LIBUNWIND_LIBCXX_CMAKE_PATH "${LIBUNWIND_LIBCXX_PATH}/cmake/Modules")
--  list(APPEND CMAKE_MODULE_PATH "${LIBUNWIND_LIBCXX_CMAKE_PATH}")
-+  if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
-+    set(LLVM_LIT ${LLVM_MAIN_SRC_DIR}/utils/lit/lit.py)
-+  else()
-+    # Seek installed Lit.
-+    find_program(LLVM_LIT "lit.py" ${LLVM_MAIN_SRC_DIR}/utils/lit
-+                 DOC "Path to lit.py")
-+  endif()
- 
--  # In a standalone build, we don't have llvm to automatically generate the
--  # llvm-lit script for us.  So we need to provide an explicit directory that
--  # the configurator should write the script into.
--  set(LIBUNWIND_STANDALONE_BUILD 1)
--  set(LLVM_LIT_OUTPUT_DIR "${LIBUNWIND_BINARY_DIR}/bin")
-+  if (LLVM_LIT)
-+    # Define the default arguments to use with 'lit', and an option for the user
-+    # to override.
-+    set(LIT_ARGS_DEFAULT "-sv")
-+    if (MSVC OR XCODE)
-+      set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
-+    endif()
-+    set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
-+
-+    # On Win32 hosts, provide an option to specify the path to the GnuWin32 tools.
-+    if (WIN32 AND NOT CYGWIN)
-+      set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
-+    endif()
-+  else()
-+    set(LLVM_INCLUDE_TESTS OFF)
-+  endif()
- 
--  # Find the LLVM sources and simulate LLVM CMake options.
--  include(HandleOutOfTreeLLVM)
-+  set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
-+  set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
- else()
-   set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py")
- endif()
-@@ -85,8 +153,6 @@ set(LIBUNWIND_TEST_COMPILER_FLAGS "" CACHE STRING
-     "Additional compiler flags for test programs.")
- set(LIBUNWIND_TEST_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test/lit.site.cfg.in" CACHE STRING
-     "The Lit testing configuration to use when running the tests.")
--set(LIBUNWIND_TEST_PARAMS "" CACHE STRING
--    "A list of parameters to run the Lit test suite with.")
- 
- if (NOT LIBUNWIND_ENABLE_SHARED AND NOT LIBUNWIND_ENABLE_STATIC)
-   message(FATAL_ERROR "libunwind must be built as either a shared or static library.")
-@@ -113,6 +179,9 @@ set(CMAKE_MODULE_PATH
-     "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
-     ${CMAKE_MODULE_PATH})
- 
-+set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
-+set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
-+
- if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
-   set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
-   set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
diff --git a/deps/patches/openblas-memory-buffer-multi-threading.patch b/deps/patches/openblas-memory-buffer-multi-threading.patch
new file mode 100644
index 0000000000000..9693b5cf61597
--- /dev/null
+++ b/deps/patches/openblas-memory-buffer-multi-threading.patch
@@ -0,0 +1,49 @@
+From 23b5d66a86417a071bba9a96a0573192237981b6 Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Wed, 14 Aug 2024 10:35:44 +0200
+Subject: [PATCH 1/2] Ensure a memory buffer has been allocated for each thread
+ before invoking it
+
+---
+ driver/others/blas_server.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+From d24b3cf39392a99e81ed47a5f093fbd074d4b39b Mon Sep 17 00:00:00 2001
+From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
+Date: Thu, 15 Aug 2024 15:32:58 +0200
+Subject: [PATCH 2/2] properly fix buffer allocation and assignment
+
+---
+ driver/others/blas_server.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c
+index 765511d8c7..b9a7674c17 100644
+--- a/driver/others/blas_server.c
++++ b/driver/others/blas_server.c
+@@ -1076,6 +1076,8 @@ fprintf(STDERR, "Server[%2ld] Calculation started.  Mode = 0x%03x M = %3ld N=%3l
+       main_status[cpu] = MAIN_RUNNING1;
+ #endif
+ 
++if (buffer == NULL) blas_thread_buffer[cpu] = blas_memory_alloc(2);
++	
+ //For target LOONGSON3R5, applying an offset to the buffer is essential
+ //for minimizing cache conflicts and optimizing performance.
+ #if defined(ARCH_LOONGARCH64) && !defined(NO_AFFINITY)
+
+diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c
+index b9a7674c17..29f8a5e646 100644
+--- a/driver/others/blas_server.c
++++ b/driver/others/blas_server.c
+@@ -1076,7 +1076,11 @@ fprintf(STDERR, "Server[%2ld] Calculation started.  Mode = 0x%03x M = %3ld N=%3l
+       main_status[cpu] = MAIN_RUNNING1;
+ #endif
+ 
+-if (buffer == NULL) blas_thread_buffer[cpu] = blas_memory_alloc(2);
++if (buffer == NULL) {
++	blas_thread_buffer[cpu] = blas_memory_alloc(2);
++	buffer = blas_thread_buffer[cpu];
++}      
++
+ 	
+ //For target LOONGSON3R5, applying an offset to the buffer is essential
+ //for minimizing cache conflicts and optimizing performance.
diff --git a/deps/terminfo.mk b/deps/terminfo.mk
new file mode 100644
index 0000000000000..60865838a813e
--- /dev/null
+++ b/deps/terminfo.mk
@@ -0,0 +1,43 @@
+## TERMINFO-DB ##
+include $(SRCDIR)/terminfo.version
+
+$(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/JuliaBinaryWrappers/TermInfoDB_jll.jl/releases/download/$(TERMINFO_TAG)/TermInfoDB.v$(TERMINFO_VER).any.tar.gz
+	touch -c $@
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+	$(JLCHECKSUM) $<
+	rm -rf $(dir $@)
+	mkdir -p $(dir $@)
+	$(TAR) -C $(dir $@) --strip-components 1 -xf $<
+	echo 1 > $@
+
+checksum-terminfo: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+	$(JLCHECKSUM) $<
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted
+	echo 1 > $@
+
+$(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+	echo 1 > $@
+
+define TERMINFO_INSTALL
+	mkdir -p $2/$$(build_datarootdir)/julia
+	cp -R $1/terminfo $2/$$(build_datarootdir)/julia/
+endef
+$(eval $(call staged-install, \
+	terminfo,TermInfoDB-v$(TERMINFO_VER), \
+	TERMINFO_INSTALL,,,,))
+
+clean-terminfo:
+	-rm -f $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+
+distclean-terminfo:
+	rm -rf $(SRCCACHE)/TermInfoDB*.tar.gz $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER) $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)
+
+get-terminfo: $(SRCCACHE)/TermInfoDB-v$(TERMINFO_VER).any.tar.gz
+extract-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/source-extracted
+configure-terminfo: extract-terminfo
+compile-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-compiled
+fastcheck-terminfo: check-terminfo
+check-terminfo: $(BUILDDIR)/TermInfoDB-v$(TERMINFO_VER)/build-checked
diff --git a/deps/terminfo.version b/deps/terminfo.version
new file mode 100644
index 0000000000000..b7c020b830517
--- /dev/null
+++ b/deps/terminfo.version
@@ -0,0 +1,3 @@
+# -*- makefile -*-
+TERMINFO_VER := 2023.12.9
+TERMINFO_TAG := TermInfoDB-v$(TERMINFO_VER)+0
diff --git a/deps/unwind.mk b/deps/unwind.mk
index 0bfaca574f5e6..3951bbf36e22f 100644
--- a/deps/unwind.mk
+++ b/deps/unwind.mk
@@ -38,13 +38,17 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-aarch64-inline-asm.patch-applied:
 	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-aarch64-inline-asm.patch
 	echo 1 > $@
 
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-disable-initial-exec-tls.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-aarch64-inline-asm.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-disable-initial-exec-tls.patch
+	echo 1 > $@
+
 # note minidebuginfo requires liblzma, which we do not have a source build for
 # (it will be enabled in BinaryBuilder-based downloads however)
 # since https://github.com/JuliaPackaging/Yggdrasil/commit/0149e021be9badcb331007c62442a4f554f3003c
-$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-aarch64-inline-asm.patch-applied
+$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-disable-initial-exec-tls.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks
+	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo --disable-conservative-checks --enable-per-thread-cache
 	echo 1 > $@
 
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured
@@ -84,50 +88,51 @@ check-unwind: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-checked
 LLVMUNWIND_OPTS := $(CMAKE_COMMON) \
 	-DCMAKE_BUILD_TYPE=MinSizeRel \
 	-DLIBUNWIND_ENABLE_PEDANTIC=OFF \
-	-DLLVM_PATH=$(SRCCACHE)/$(LLVM_SRC_DIR)/llvm
+	-DLIBUNWIND_INCLUDE_DOCS=OFF \
+	-DLIBUNWIND_INCLUDE_TESTS=OFF \
+	-DLIBUNWIND_INSTALL_HEADERS=ON \
+	-DLIBUNWIND_ENABLE_ASSERTIONS=OFF \
+	-DLLVM_CONFIG_PATH=$(build_depsbindir)/llvm-config \
+	-DLLVM_PATH=$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/llvm
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/libunwind-$(LLVMUNWIND_VER).src.tar.xz
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/llvm-project-$(LLVMUNWIND_VER).src.tar.xz
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
 	cd $(dir $<) && $(TAR) xf $<
-	mv $(SRCCACHE)/libunwind-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)
+	mv $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-prologue-epilogue.patch-applied
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-revert-monorepo-requirement.patch
+$(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-force-dwarf.patch-applied
+	cd $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
 	echo 1 > $@
 
-$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-revert-monorepo-requirement.patch-applied
-	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-freebsd-libgcc-api-compat.patch
-	echo 1 > $@
-
-checksum-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+checksum-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
 
-$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied
+$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/llvm-libunwind-freebsd-libgcc-api-compat.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(CMAKE) $(dir $<) $(LLVMUNWIND_OPTS)
+	$(CMAKE) $(dir $<)/libunwind $(LLVMUNWIND_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
-	$(CMAKE) --build $(dir $<)
+	$(MAKE) -C $(dir $<)
 	echo 1 > $@
 
 $(eval $(call staged-install, \
 	llvmunwind,llvmunwind-$(LLVMUNWIND_VER), \
 	MAKE_INSTALL,,, \
-	cp -fR $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/include/* $(build_includedir)))
+	cp -fR $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/libunwind/* $(build_includedir)))
 
 clean-llvmunwind:
 	-rm -f $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
@@ -135,14 +140,14 @@ clean-llvmunwind:
 	-$(MAKE) -C $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER) clean
 
 distclean-llvmunwind:
-	rm -rf $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz \
+	rm -rf $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz \
 		$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) \
 		$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)
 
-get-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
-extract-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
-configure-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
-compile-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
+get-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER).tar.xz
+extract-llvmunwind: $(SRCCACHE)/llvm-project-$(LLVMUNWIND_VER)/source-extracted
+configure-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-configured
+compile-llvmunwind: $(BUILDDIR)/llvm-project-$(LLVMUNWIND_VER)/build-compiled
 fastcheck-llvmunwind: check-llvmunwind
 check-llvmunwind: # no test/check provided by Makefile
 
diff --git a/deps/zlib.mk b/deps/zlib.mk
index 95168b115ef31..5548a0791f4d2 100644
--- a/deps/zlib.mk
+++ b/deps/zlib.mk
@@ -14,7 +14,7 @@ $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured: $(SRCCACHE)/$(ZLIB_SRC_DIR)/source
 	echo 1 > $@
 
 $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-compiled: $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured
-	$(CMAKE) --build $(dir $<)
+	$(MAKE) -C $(dir $<) $(MAKE_COMMON)
 	echo 1 > $@
 
 $(eval $(call staged-install, \
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
index 7b8bd47d42a47..c0f8b693bd1ac 100644
--- a/doc/Manifest.toml
+++ b/doc/Manifest.toml
@@ -219,7 +219,7 @@ uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 version = "1.11.0"
 
 [[deps.REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"]
+deps = ["InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 version = "1.11.0"
 
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index 049543d795acd..56cb690d66eeb 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -28,7 +28,7 @@
 julia - a high-level, high-performance dynamic programming language for technical computing
 
 .SH SYNOPSIS
-\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMMFILE] [ARGS...]
+\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMFILE] [ARGS...]
 
 If a Julia source file is given as a \fIPROGRAMFILE\fP (optionally followed by
 arguments in \fIARGS\fP) Julia will execute the program and exit.
@@ -283,12 +283,17 @@ Generate an incremental output file (rather than complete)
 
 .TP
 --trace-compile={stderr|name}
-Print precompile statements for methods compiled during execution or save to a path
+Print precompile statements for methods compiled during execution or save to stderr or a path.
+Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported
 
 .TP
 --trace-compile-timing=
 If --trace-compile is enabled show how long each took to compile in ms
 
+.TP
+--trace-dispatch={stderr|name}
+Print precompile statements for methods dispatched during execution or save to stderr or a path.
+
 .TP
 -image-codegen
 Force generate code in imaging mode
diff --git a/doc/src/base/arrays.md b/doc/src/base/arrays.md
index b01540d35def4..66fe5c78f1ee6 100644
--- a/doc/src/base/arrays.md
+++ b/doc/src/base/arrays.md
@@ -79,6 +79,7 @@ to operate on arrays, you should use `sin.(a)` to vectorize via `broadcast`.
 Base.broadcast
 Base.Broadcast.broadcast!
 Base.@__dot__
+Base.Broadcast.BroadcastFunction
 ```
 
 For specializing broadcast on custom types, see
@@ -137,6 +138,7 @@ Base.parentindices
 Base.selectdim
 Base.reinterpret
 Base.reshape
+Base.insertdims
 Base.dropdims
 Base.vec
 Base.SubArray
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index d7e7fff7cbda7..b5d50a846ce89 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -102,6 +102,11 @@ where
 ;
 =
 ?:
+.=
+.
+->
+::
+[]
 ```
 
 ## Standard Modules
@@ -277,6 +282,7 @@ Base.:(|>)
 Base.:(∘)
 Base.ComposedFunction
 Base.splat
+Base.Fix
 Base.Fix1
 Base.Fix2
 ```
@@ -307,7 +313,12 @@ Base.@simd
 Base.@polly
 Base.@generated
 Base.@assume_effects
+```
+
+## Managing deprecations
+```@docs
 Base.@deprecate
+Base.depwarn
 ```
 
 ## Missing Values
diff --git a/doc/src/base/file.md b/doc/src/base/file.md
index 22799f882bb26..300738a39322d 100644
--- a/doc/src/base/file.md
+++ b/doc/src/base/file.md
@@ -29,6 +29,7 @@ Base.Filesystem.operm
 Base.Filesystem.cp
 Base.download
 Base.Filesystem.mv
+Base.Filesystem.rename
 Base.Filesystem.rm
 Base.Filesystem.touch
 Base.Filesystem.tempname
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index 7091aa6f1aa87..4f816ce2a6c1d 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -166,6 +166,7 @@ Base.flipsign
 Base.sqrt(::Number)
 Base.isqrt
 Base.Math.cbrt(::AbstractFloat)
+Base.fourthroot(::Number)
 Base.real
 Base.imag
 Base.reim
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index 58ec078a8e0cf..9f24db176b538 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -138,7 +138,7 @@ end
 
 ev = OneWayEvent()
 @sync begin
-    @async begin
+    Threads.@spawn begin
         wait(ev)
         println("done")
     end
diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
index ef470be6b55cc..a9637a1a7be3a 100644
--- a/doc/src/base/strings.md
+++ b/doc/src/base/strings.md
@@ -17,11 +17,6 @@ Core.String(::AbstractString)
 Base.SubString
 Base.LazyString
 Base.@lazy_str
-Base.AnnotatedString
-Base.AnnotatedChar
-Base.annotatedstring
-Base.annotations
-Base.annotate!
 Base.transcode
 Base.unsafe_string
 Base.ncodeunits(::AbstractString)
@@ -48,6 +43,9 @@ Base.:(==)(::AbstractString, ::AbstractString)
 Base.cmp(::AbstractString, ::AbstractString)
 Base.lpad
 Base.rpad
+Base.ltruncate
+Base.rtruncate
+Base.ctruncate
 Base.findfirst(::AbstractString, ::AbstractString)
 Base.findnext(::AbstractString, ::AbstractString, ::Integer)
 Base.findnext(::AbstractChar, ::AbstractString, ::Integer)
@@ -98,3 +96,17 @@ Base.escape_string
 Base.escape_raw_string
 Base.unescape_string
 ```
+
+## `AnnotatedString`s
+
+!!! note
+    The API for AnnotatedStrings is considered experimental and is subject to change between
+    Julia versions.
+
+```@docs
+Base.AnnotatedString
+Base.AnnotatedChar
+Base.annotatedstring
+Base.annotations
+Base.annotate!
+```
diff --git a/doc/src/devdocs/build/arm.md b/doc/src/devdocs/build/arm.md
index 747ee25d22a04..df9ede07d270f 100644
--- a/doc/src/devdocs/build/arm.md
+++ b/doc/src/devdocs/build/arm.md
@@ -55,18 +55,9 @@ due to unsupported inline assembly. In that case, add `MCPU=armv7-a` to
 
 ## AArch64 (ARMv8)
 
-Julia has been successfully built on the following ARMv8 devices:
+Julia is expected to work and build on ARMv8 cpus. One should follow the general [build instructions](https://github.com/JuliaLang/julia/blob/master/README.md). Julia expects to have around 8GB of ram or swap enabled to build itself.
 
-* [nVidia Jetson TX1 & TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html);
-* [X-Gene 1](https://www.apm.com/products/data-center/x-gene-family/x-gene/);
-* [Overdrive 3000](https://softiron.com/products/overdrive-3000/);
-* [Cavium ThunderX](https://www.cavium.com/ThunderX_ARM_Processors.html) on [packet.net](https://www.packet.net).
-
-Compilation on `ARMv8-A` requires that `Make.user` is configured as follows:
-
-```
-MCPU=armv8-a
-```
+### Known issues
 
 Starting from Julia v1.10, [JITLink](https://llvm.org/docs/JITLink.html) is automatically enabled on this architecture for all operating systems when linking to LLVM 15 or later versions.
 Due to a [bug in LLVM memory manager](https://github.com/llvm/llvm-project/issues/63236), non-trivial workloads may generate too many memory mappings that on Linux can exceed the limit of memory mappings (`mmap`) set in the file `/proc/sys/vm/max_map_count`, resulting in an error like
@@ -77,21 +68,3 @@ Should this happen, ask your system administrator to increase the limit of memor
 ```
 sysctl -w vm.max_map_count=262144
 ```
-
-### nVidia Jetson TX2
-
-Julia builds and runs on the [nVidia Jetson TX2](https://www.nvidia.com/object/embedded-systems-dev-kits-modules.html)
-platform with minimal configuration changes.
-
-After configuring `Make.user` as per the `AArch64` instructions in this document,
-follow the general [build instructions](https://github.com/JuliaLang/julia/blob/master/README.md).
-The majority of the build dependencies specified in the instructions are installed by
-the default configuration flashed by [Jetpack 3.0](https://developer.nvidia.com/embedded/jetpack). The remaining tools can be installed by issuing the following command:
-
-```
-sudo apt-get install gfortran wget cmake
-```
-
-A full parallel build, including LLVM,
-will complete in around two hours. All tests pass and CUDA functionality is available
-through, e.g., [CUDAdrv](https://github.com/JuliaGPU/CUDAdrv.jl).
diff --git a/doc/src/devdocs/build/distributing.md b/doc/src/devdocs/build/distributing.md
index 99c08923b415b..ed06c20fa0df3 100644
--- a/doc/src/devdocs/build/distributing.md
+++ b/doc/src/devdocs/build/distributing.md
@@ -108,7 +108,7 @@ Alternatively, Julia may be built as a framework by invoking `make` with the
 Windows
 -------
 
-Instructions for reating a Julia distribution on Windows are described in the
+Instructions for creating a Julia distribution on Windows are described in the
 [build devdocs for Windows](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/windows.md).
 
 Notes on BLAS and LAPACK
diff --git a/doc/src/devdocs/gc.md b/doc/src/devdocs/gc.md
index 9b9038c9445f3..a45e8afb271ce 100644
--- a/doc/src/devdocs/gc.md
+++ b/doc/src/devdocs/gc.md
@@ -21,7 +21,7 @@ lists. Metadata for free pages, however, may be stored into three separate lock-
 
 Julia's pool allocator follows a "tiered" allocation discipline. When requesting a memory page for the pool allocator, Julia will:
 
-- Try to claim a page from `page_pool_lazily_freed`, which contains pages which were empty on the last stop-the-world phase, but not yet madivsed by a concurrent sweeper GC thread.
+- Try to claim a page from `page_pool_lazily_freed`, which contains pages which were empty on the last stop-the-world phase, but not yet madvised by a concurrent sweeper GC thread.
 
 - If it failed claiming a page from `page_pool_lazily_freed`, it will try to claim a page from `the page_pool_clean`, which contains pages which were mmaped on a previous page allocation request but never accessed.
 
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index ab8f7dde50022..c4b80f632cd4e 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -17,7 +17,7 @@ The code for lowering Julia AST to LLVM IR or interpreting it directly is in dir
 | `cgutils.cpp`                    | Lowering utilities, notably for array and tuple accesses           |
 | `codegen.cpp`                    | Top-level of code generation, pass list, lowering builtins         |
 | `debuginfo.cpp`                  | Tracks debug information for JIT code                              |
-| `disasm.cpp`                     | Handles native object file and JIT code diassembly                 |
+| `disasm.cpp`                     | Handles native object file and JIT code disassembly                 |
 | `gf.c`                           | Generic functions                                                  |
 | `intrinsics.cpp`                 | Lowering intrinsics                                                |
 | `jitlayers.cpp`                  | JIT-specific code, ORC compilation layers/utilities                |
diff --git a/doc/src/devdocs/object.md b/doc/src/devdocs/object.md
index a2f72d623ab21..8134132d6ee75 100644
--- a/doc/src/devdocs/object.md
+++ b/doc/src/devdocs/object.md
@@ -92,7 +92,7 @@ The corresponding global `jl_datatype_t` objects are created by [`jl_init_types`
 
 The garbage collector uses several bits from the metadata portion of the `jl_typetag_t` to track
 each object in the system. Further details about this algorithm can be found in the comments of
-the [garbage collector implementation in `gc.c`](https://github.com/JuliaLang/julia/blob/master/src/gc.c).
+the [garbage collector implementation in `gc-stock.c`](https://github.com/JuliaLang/julia/blob/master/src/gc-stock.c).
 
 ## Object allocation
 
@@ -179,7 +179,7 @@ jl_value_t *newstruct(jl_value_t *type);
 jl_value_t *newobj(jl_value_t *type, size_t nfields);
 ```
 
-And at the lowest level, memory is getting allocated by a call to the garbage collector (in `gc.c`),
+And at the lowest level, memory is getting allocated by a call to the garbage collector (in `gc-stock.c`),
 then tagged with its type:
 
 ```c
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
index 5a1af0d897bc6..a0e072c0b1ae3 100644
--- a/doc/src/devdocs/probes.md
+++ b/doc/src/devdocs/probes.md
@@ -206,7 +206,7 @@ Now we can start `bpftrace` and have it monitor `rt__new__task` for *only* this
 
 And if we spawn a single task:
 
-`@async 1+1`
+`Threads.@spawn 1+1`
 
 we see this task being created:
 
@@ -215,8 +215,8 @@ we see this task being created:
 However, if we spawn a bunch of tasks from that newly-spawned task:
 
 ```julia
-@async for i in 1:10
-   @async 1+1
+Threads.@spawn for i in 1:10
+   Threads.@spawn 1+1
 end
 ```
 
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index 7d4f7afdbb86a..64c309e1fb02a 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -117,3 +117,80 @@ See code comments for each components for more implementation details.
     depending on the ISA. The target selection will prefer exact CPU name match,
     larger vector register size, and larger number of features.
     An overview of this process is in `src/processor.cpp`.
+
+## Trimming
+
+System images are typically quite large, since Base includes a lot of functionality, and by
+default system images also include several packages such as LinearAlgebra for convenience
+and backwards compatibility. Most programs will use only a fraction of the functions in
+these packages. Therefore it makes sense to build binaries that exclude unused functions
+to save space, referred to as "trimming".
+
+While the basic idea of trimming is sound, Julia has dynamic and reflective features that make it
+difficult (or impossible) to know in general which functions are unused. As an extreme example,
+consider code like
+
+```
+getglobal(Base, Symbol(readchomp(stdin)))(1)
+```
+
+This code reads a function name from `stdin` and calls the named function from Base on the value
+`1`. In this case it is impossible to predict which function will be called, so no functions
+can reliably be considered "unused". With some noteworthy exceptions (Julia's own REPL being
+one of them), most real-world programs do not do things like this.
+
+Less extreme cases occur, for example, when there are type instabilities that make it impossible
+for the compiler to predict which method will be called. However, if code is well-typed and does
+not use reflection, a complete and (hopefully) relatively small set of needed methods can be
+determined, and the rest can be removed. The `--trim` command-line option requests this kind of
+compilation.
+
+When `--trim` is specified in a command used to build a system image, the compiler begins
+tracing calls starting at methods marked using `Base.Experimental.entrypoint`. If a call is too
+dynamic to reasonably narrow down the possible call targets, an error is given at compile
+time showing the location of the call. For testing purposes, it is possible to skip these
+errors by specifying `--trim=unsafe` or `--trim=unsafe-warn`. Then you will get a system
+image built, but it may crash at run time if needed code is not present.
+
+It typically makes sense to specify `--strip-ir` along with `--trim`, since trimmed binaries
+are fully compiled and therefore don't need Julia IR. At some point we may make `--trim` imply
+`--strip-ir`, but for now we have kept them orthogonal.
+
+To get the smallest possible binary, it will also help to specify `--strip-metadata` and
+run the Unix `strip` utility. However, those steps remove Julia-specific and native (DWARF format)
+debug info, respectively, and so will make debugging more difficult.
+
+### Common problems
+
+- The Base global variables `stdin`, `stdout`, and `stderr` are non-constant and so their
+  types are not known. All printing should use a specific IO object with a known type.
+  The easiest substitution is to use `print(Core.stdout, x)` instead of `print(x)` or
+  `print(stdout, x)`.
+- Use tools like `JET`, `Cthulhu`, and/or `SnoopCompile` to identify failures of type-inference, and
+  follow our [Performance Tips](@ref) to fix them.
+
+### Compatibility concerns
+
+We have identified many small changes to Base that significantly increase the set of programs
+that can be reliably trimmed. Unfortunately some of those changes would be considered breaking,
+and so are only applied when trimming is requested (this is done by an external build script,
+currently maintained inside the test suite as `test/trimming/buildscript.jl`).
+Therefore in many cases trimming will require you to opt in to new variants of Base and some
+standard libraries.
+
+If you want to use trimming, it is important to set up continuous integration testing that
+performs a trimmed build and fully tests the resulting program.
+Fortunately, if your program successfully compiles with `--trim` then it is very likely to work
+the same as it did before. However, CI is needed to ensure that your program continues to build
+with trimming as you develop it.
+
+Package authors may wish to test that their package is "trimming safe", however this is impossible
+in general. Trimming is only expected to work given concrete entry points such as `main()` and
+library entry points meant to be called from outside Julia. For generic packages, existing tests
+for type stability like `@inferred` and `JET` are about as close as you can get to checking
+trim compatibility.
+
+Trimming also introduces new compatibility issues between minor versions of Julia. At this time,
+we are not able to guarantee that a program that can be trimmed in one version of Julia
+can also be trimmed in all future versions of Julia. However, breakage of that kind is expected
+to be rare. We also plan to try to *increase* the set of programs that can be trimmed over time.
diff --git a/doc/src/manual/asynchronous-programming.md b/doc/src/manual/asynchronous-programming.md
index 15db6eda5f807..d1d095c48b2ff 100644
--- a/doc/src/manual/asynchronous-programming.md
+++ b/doc/src/manual/asynchronous-programming.md
@@ -64,8 +64,8 @@ the next input prompt appears. That is because the REPL is waiting for `t`
 to finish before proceeding.
 
 It is common to want to create a task and schedule it right away, so the
-macro [`@async`](@ref) is provided for that purpose --- `@async x` is
-equivalent to `schedule(@task x)`.
+macro [`Threads.@spawn`](@ref) is provided for that purpose --- `Threads.@spawn x` is
+equivalent to `task = @task x; task.sticky = false; schedule(task)`.
 
 ## Communicating with Channels
 
@@ -186,7 +186,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
 
     # we can schedule `n` instances of `foo` to be active concurrently.
     for _ in 1:n
-        errormonitor(@async foo())
+        errormonitor(Threads.@spawn foo())
     end
     ```
   * Channels are created via the `Channel{T}(sz)` constructor. The channel will only hold objects
@@ -264,10 +264,10 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
+julia> errormonitor(Threads.@spawn make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for i in 1:4 # start 4 tasks to process requests in parallel
-           errormonitor(@async do_work())
+           errormonitor(Threads.@spawn do_work())
        end
 
 julia> @elapsed while n > 0 # print out results
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 6f4d69b16bc81..b8d064f698208 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -996,7 +996,7 @@ A table of translations between the macro and function interfaces is given below
 |------------------------------------------------------------------------------|-----------------------------------------------------------------------------|
 | `@ccall clock()::Int32`                                                      | `ccall(:clock, Int32, ())`                                                  |
 | `@ccall f(a::Cint)::Cint`                                                    | `ccall(:a, Cint, (Cint,), a)`                                               |
-| `@ccall "mylib".f(a::Cint, b::Cdouble)::Cvoid`                               | `ccall((:f, "mylib"), Cvoid, (Cint, Cdouble), (a, b))`                      |
+| `@ccall "mylib".f(a::Cint, b::Cdouble)::Cvoid`                               | `ccall((:f, "mylib"), Cvoid, (Cint, Cdouble), a, b)`                        |
 | `@ccall $fptr.f()::Cvoid`                                                    | `ccall(fptr, f, Cvoid, ())`                                                 |
 | `@ccall printf("%s = %d\n"::Cstring ; "foo"::Cstring, foo::Cint)::Cint`      | `<unavailable>`                                                             |
 | `@ccall printf("%s = %s\n"::Cstring ; "2 + 2"::Cstring, "5"::Cstring)::Cint` | `ccall(:printf, Cint, (Cstring, Cstring...), "%s = %s\n", "2 + 2", "5")`    |
diff --git a/doc/src/manual/command-line-interface.md b/doc/src/manual/command-line-interface.md
index d1ed576c42a4f..734d7031db5e8 100644
--- a/doc/src/manual/command-line-interface.md
+++ b/doc/src/manual/command-line-interface.md
@@ -214,11 +214,12 @@ The following is a complete list of command-line switches available when launchi
 |`--output-bc <name>`                   |Generate LLVM bitcode (.bc)|
 |`--output-asm <name>`                  |Generate an assembly file (.s)|
 |`--output-incremental={yes\|no*}`      |Generate an incremental output file (rather than complete)|
-|`--trace-compile={stderr\|name}`       |Print precompile statements for methods compiled during execution or save to a path|
+|`--trace-compile={stderr\|name}`       |Print precompile statements for methods compiled during execution or save to stderr or a path. Methods that were recompiled are printed in yellow or with a trailing comment if color is not supported|
 |`--trace-compile-timing`               |If --trace-compile is enabled show how long each took to compile in ms|
+|`--trace-dispatch={stderr\|name}`      |Print precompile statements for methods dispatched during execution or save to stderr or a path.|
 |`--image-codegen`                      |Force generate code in imaging mode|
 |`--permalloc-pkgimg={yes\|no*}`        |Copy the data section of package images into memory|
-
+|`--trim={no*\|safe\|unsafe\|unsafe-warn}` |Build a sysimage including only code provably reachable from methods marked by calling `entrypoint`. The three non-default options differ in how they handle dynamic call sites. In safe mode, such sites result in compile-time errors. In unsafe mode, such sites are allowed but the resulting binary might be missing needed code and can throw runtime errors. With unsafe-warn, such sites will trigger warnings at compile-time and might error at runtime.|
 
 !!! compat "Julia 1.1"
     In Julia 1.0, the default `--project=@.` option did not search up from the root
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index d325239fc9e2d..f60dfb7004ada 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -123,7 +123,7 @@ An important thing to remember is that, once fetched, a [`Future`](@ref Distribu
 locally. Further [`fetch`](@ref) calls do not entail a network hop. Once all referencing [`Future`](@ref Distributed.Future)s
 have fetched, the remote stored value is deleted.
 
-[`@async`](@ref) is similar to [`@spawnat`](@ref), but only runs tasks on the local process. We
+[`Threads.@spawn`](@ref) is similar to [`@spawnat`](@ref), but only runs tasks on the local process. We
 use it to create a "feeder" task for each process. Each task picks the next index that needs to
 be computed, then waits for its process to finish, then repeats until we run out of indices. Note
 that the feeder tasks do not begin to execute until the main task reaches the end of the [`@sync`](@ref)
@@ -657,7 +657,7 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
+julia> errormonitor(Threads.@spawn make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for p in workers() # start tasks on the workers to process requests in parallel
            remote_do(do_work, p, jobs, results)
@@ -896,7 +896,7 @@ conflicts. For example:
 ```julia
 @sync begin
     for p in procs(S)
-        @async begin
+        Threads.@spawn begin
             remotecall_wait(fill!, p, S, p)
         end
     end
@@ -978,7 +978,7 @@ and one that delegates in chunks:
 julia> function advection_shared!(q, u)
            @sync begin
                for p in procs(q)
-                   @async remotecall_wait(advection_shared_chunk!, p, q, u)
+                   Threads.@spawn remotecall_wait(advection_shared_chunk!, p, q, u)
                end
            end
            q
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index 84f36144304aa..b86822e0be4b7 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -144,7 +144,7 @@ files, artifacts, etc. For example, to switch the user depot to `/foo/bar` just
 ```sh
 export JULIA_DEPOT_PATH="/foo/bar:"
 ```
-All package operations, like cloning registrise or installing packages, will now write to
+All package operations, like cloning registries or installing packages, will now write to
 `/foo/bar`, but since the empty entry is expanded to the default system depot, any bundled
 resources will still be available. If you really only want to use the depot at `/foo/bar`,
 and not load any bundled resources, simply set the environment variable to `/foo/bar`
@@ -267,6 +267,14 @@ versions of packages already installed as possible.
 !!! compat "Julia 1.9"
     This only affects Julia 1.9 and above.
 
+### [`JULIA_PKG_GC_AUTO`](@id JULIA_PKG_GC_AUTO)
+
+If set to `false`, automatic garbage collection of packages and artifacts will be disabled;
+see [`Pkg.gc`](https://pkgdocs.julialang.org/v1/api/#Pkg.gc) for more details.
+
+!!! compat "Julia 1.12"
+    This environment variable is only supported on Julia 1.12 and above.
+
 ## Network transport
 
 ### [`JULIA_NO_VERIFY_HOSTS`](@id JULIA_NO_VERIFY_HOSTS)
@@ -320,16 +328,25 @@ a master process to establish a connection before dying.
 
 ### [`JULIA_NUM_THREADS`](@id JULIA_NUM_THREADS)
 
-An unsigned 64-bit integer (`uint64_t`) that sets the maximum number of threads
-available to Julia. If `$JULIA_NUM_THREADS` is not positive or is not set, or
-if the number of CPU threads cannot be determined through system calls, then the
-number of threads is set to `1`.
+An unsigned 64-bit integer (`uint64_t`) or string that sets the maximum number
+of threads available to Julia. If `$JULIA_NUM_THREADS` is not set or is a
+non-positive integer, or if the number of CPU threads cannot be determined
+through system calls, then the number of threads is set to `1`.
 
 If `$JULIA_NUM_THREADS` is set to `auto`, then the number of threads will be set
-to the number of CPU threads.
+to the number of CPU threads. It can also be set to a comma-separated string to
+specify the size of the `:default` and `:interactive` [threadpools](@ref
+man-threadpools), respectively:
+```bash
+# 5 threads in the :default pool and 2 in the :interactive pool
+export JULIA_NUM_THREADS=5,2
+
+# `auto` threads in the :default pool and 1 in the :interactive pool
+export JULIA_NUM_THREADS=auto,1
+```
 
 !!! note
-    `JULIA_NUM_THREADS` must be defined before starting julia; defining it in
+    `JULIA_NUM_THREADS` must be defined before starting Julia; defining it in
     `startup.jl` is too late in the startup process.
 
 !!! compat "Julia 1.5"
@@ -339,6 +356,9 @@ to the number of CPU threads.
 !!! compat "Julia 1.7"
     The `auto` value for `$JULIA_NUM_THREADS` requires Julia 1.7 or above.
 
+!!! compat "Julia 1.9"
+    The `x,y` format for threadpools requires Julia 1.9 or above.
+
 ### [`JULIA_THREAD_SLEEP_THRESHOLD`](@id JULIA_THREAD_SLEEP_THRESHOLD)
 
 If set to a string that starts with the case-insensitive substring `"infinite"`,
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index 8984e1d15ddd3..2673ca7532acf 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -943,7 +943,7 @@ Consider the printed output from the following:
 
 ```jldoctest
 julia> @sync for i in 1:3
-           @async write(stdout, string(i), " Foo ", " Bar ")
+           Threads.@spawn write(stdout, string(i), " Foo ", " Bar ")
        end
 123 Foo  Foo  Foo  Bar  Bar  Bar
 ```
@@ -956,7 +956,7 @@ in the above example results in:
 
 ```jldoctest
 julia> @sync for i in 1:3
-           @async println(stdout, string(i), " Foo ", " Bar ")
+           Threads.@spawn println(stdout, string(i), " Foo ", " Bar ")
        end
 1 Foo  Bar
 2 Foo  Bar
@@ -969,7 +969,7 @@ You can lock your writes with a `ReentrantLock` like this:
 julia> l = ReentrantLock();
 
 julia> @sync for i in 1:3
-           @async begin
+           Threads.@spawn begin
                lock(l)
                try
                    write(stdout, string(i), " Foo ", " Bar ")
diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md
index 9a91ea7467750..be81fe529ef7d 100644
--- a/doc/src/manual/functions.md
+++ b/doc/src/manual/functions.md
@@ -292,12 +292,12 @@ syntaxes:
 
 ```jldoctest
 julia> x -> x^2 + 2x - 1
-#1 (generic function with 1 method)
+#2 (generic function with 1 method)
 
 julia> function (x)
            x^2 + 2x - 1
        end
-#3 (generic function with 1 method)
+#5 (generic function with 1 method)
 ```
 
 Each statement creates a function taking one argument `x` and returning the value of the polynomial `x^2 +
diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md
index e34d5543389c1..2c69aabbda192 100644
--- a/doc/src/manual/getting-started.md
+++ b/doc/src/manual/getting-started.md
@@ -56,4 +56,4 @@ search: begin disable_sigint reenable_sigint
   begin...end denotes a block of code.
 ```
 
-If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips), or check out the comprehensive [ModernJuliaWorkflows](https://modernjuliaworkflows.github.io/) blog.
+If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips), or check out the comprehensive [ModernJuliaWorkflows](https://modernjuliaworkflows.org/) blog.
diff --git a/doc/src/manual/mathematical-operations.md b/doc/src/manual/mathematical-operations.md
index 1d613931669fc..d2cef68bd6fff 100644
--- a/doc/src/manual/mathematical-operations.md
+++ b/doc/src/manual/mathematical-operations.md
@@ -551,21 +551,22 @@ See [Conversion and Promotion](@ref conversion-and-promotion) for how to define
 
 ### Powers, logs and roots
 
-| Function                 | Description                                                                |
-|:------------------------ |:-------------------------------------------------------------------------- |
-| [`sqrt(x)`](@ref), `√x`  | square root of `x`                                                         |
-| [`cbrt(x)`](@ref), `∛x`  | cube root of `x`                                                           |
-| [`hypot(x, y)`](@ref)    | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
-| [`exp(x)`](@ref)         | natural exponential function at `x`                                        |
-| [`expm1(x)`](@ref)       | accurate `exp(x) - 1` for `x` near zero                                    |
-| [`ldexp(x, n)`](@ref)    | `x * 2^n` computed efficiently for integer values of `n`                   |
-| [`log(x)`](@ref)         | natural logarithm of `x`                                                   |
-| [`log(b, x)`](@ref)      | base `b` logarithm of `x`                                                  |
-| [`log2(x)`](@ref)        | base 2 logarithm of `x`                                                    |
-| [`log10(x)`](@ref)       | base 10 logarithm of `x`                                                   |
-| [`log1p(x)`](@ref)       | accurate `log(1 + x)` for `x` near zero                                    |
-| [`exponent(x)`](@ref)    | binary exponent of `x`                                                     |
-| [`significand(x)`](@ref) | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
+| Function                      | Description                                                                |
+|:----------------------------- |:-------------------------------------------------------------------------- |
+| [`sqrt(x)`](@ref), `√x`       | square root of `x`                                                         |
+| [`cbrt(x)`](@ref), `∛x`       | cube root of `x`                                                           |
+| [`fourthroot(x)`](@ref), `∜x` | fourth root of `x`                                                         |
+| [`hypot(x, y)`](@ref)         | hypotenuse of right-angled triangle with other sides of length `x` and `y` |
+| [`exp(x)`](@ref)              | natural exponential function at `x`                                        |
+| [`expm1(x)`](@ref)            | accurate `exp(x) - 1` for `x` near zero                                    |
+| [`ldexp(x, n)`](@ref)         | `x * 2^n` computed efficiently for integer values of `n`                   |
+| [`log(x)`](@ref)              | natural logarithm of `x`                                                   |
+| [`log(b, x)`](@ref)           | base `b` logarithm of `x`                                                  |
+| [`log2(x)`](@ref)             | base 2 logarithm of `x`                                                    |
+| [`log10(x)`](@ref)            | base 10 logarithm of `x`                                                   |
+| [`log1p(x)`](@ref)            | accurate `log(1 + x)` for `x` near zero                                    |
+| [`exponent(x)`](@ref)         | binary exponent of `x`                                                     |
+| [`significand(x)`](@ref)      | binary significand (a.k.a. mantissa) of a floating-point number `x`        |
 
 For an overview of why functions like [`hypot`](@ref), [`expm1`](@ref), and [`log1p`](@ref)
 are necessary and useful, see John D. Cook's excellent pair of blog posts on the subject: [expm1, log1p, erfc](https://www.johndcook.com/blog/2010/06/07/math-library-functions-that-seem-unnecessary/),
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index d45644bf55842..6be44dcf4fa13 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -614,7 +614,7 @@ Start some other operations that use `f(x)`:
 julia> g(x) = f(x)
 g (generic function with 1 method)
 
-julia> t = @async f(wait()); yield();
+julia> t = Threads.@spawn f(wait()); yield();
 ```
 
 Now we add some new methods to `f(x)`:
@@ -639,7 +639,7 @@ julia> g(1)
 julia> fetch(schedule(t, 1))
 "original definition"
 
-julia> t = @async f(wait()); yield();
+julia> t = Threads.@spawn f(wait()); yield();
 
 julia> fetch(schedule(t, 1))
 "definition for Int"
diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md
index 45bf60a7944d2..35ba7fdf16601 100644
--- a/doc/src/manual/networking-and-streams.md
+++ b/doc/src/manual/networking-and-streams.md
@@ -233,7 +233,7 @@ Let's first create a simple server:
 ```julia-repl
 julia> using Sockets
 
-julia> errormonitor(@async begin
+julia> errormonitor(Threads.@spawn begin
            server = listen(2000)
            while true
                sock = accept(server)
@@ -305,11 +305,11 @@ printed the message and waited for the next client. Reading and writing works in
 To see this, consider the following simple echo server:
 
 ```julia-repl
-julia> errormonitor(@async begin
+julia> errormonitor(Threads.@spawn begin
            server = listen(2001)
            while true
                sock = accept(server)
-               @async while isopen(sock)
+               Threads.@spawn while isopen(sock)
                    write(sock, readline(sock, keep=true))
                end
            end
@@ -319,7 +319,7 @@ Task (runnable) @0x00007fd31dc12e60
 julia> clientside = connect(2001)
 TCPSocket(RawFD(28) open, 0 bytes waiting)
 
-julia> errormonitor(@async while isopen(clientside)
+julia> errormonitor(Threads.@spawn while isopen(clientside)
            write(stdout, readline(clientside, keep=true))
        end)
 Task (runnable) @0x00007fd31dc11870
@@ -357,10 +357,10 @@ ip"74.125.226.225"
 
 All I/O operations exposed by [`Base.read`](@ref) and [`Base.write`](@ref) can be performed
 asynchronously through the use of [coroutines](@ref man-tasks). You can create a new coroutine to
-read from or write to a stream using the [`@async`](@ref) macro:
+read from or write to a stream using the [`Threads.@spawn`](@ref) macro:
 
 ```julia-repl
-julia> task = @async open("foo.txt", "w") do io
+julia> task = Threads.@spawn open("foo.txt", "w") do io
            write(io, "Hello, World!")
        end;
 
@@ -379,7 +379,7 @@ your program to block until all of the coroutines it wraps around have exited:
 julia> using Sockets
 
 julia> @sync for hostname in ("google.com", "github.com", "julialang.org")
-           @async begin
+           Threads.@spawn begin
                conn = connect(hostname, 80)
                write(conn, "GET / HTTP/1.1\r\nHost:$(hostname)\r\n\r\n")
                readline(conn, keep=true)
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index 181fe0a30eb38..33285bde8a066 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -220,8 +220,8 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Unlike Python, Julia allows [AbstractArrays with arbitrary indexes](https://julialang.org/blog/2017/04/offset-arrays/).
     Python's special interpretation of negative indexing, `a[-1]` and `a[-2]`, should be written
     `a[end]` and `a[end-1]` in Julia.
-  * Julia requires `end` for indexing until the last element. `x[1:]` in Python is equivalent to `x[2:end]` in Julia.
-  * In Julia, `:` before any object creates a [`Symbol`](@ref) or *quotes* an expression; so, `x[:5]` is same as `x[5]`. If you want to get the first `n` elements of an array, then use range indexing.
+  * Julia requires `end` for indexing until the last element. `x[2:end]` in Julia is equivalent to `x[1:]` in Python.
+  * In Julia, `:` before any object creates a [`Symbol`](@ref) or *quotes* an expression; so, `x[:5]` is the same as `x[5]`. If you want to get the first `n` elements of an array, then use range indexing.
   * Julia's range indexing has the format of `x[start:step:stop]`, whereas Python's format is `x[start:(stop+1):step]`. Hence, `x[0:10:2]` in Python is equivalent to `x[1:2:10]` in Julia. Similarly, `x[::-1]` in Python, which refers to the reversed array, is equivalent to `x[end:-1:1]` in Julia.
   * In Julia, ranges can be constructed independently as `start:step:stop`, the same syntax it uses
     in array-indexing. The `range` function is also supported.
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index 5c10652eb99cb..3033720b5df8c 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -1394,6 +1394,125 @@ Prominent examples include [MKL.jl](https://github.com/JuliaLinearAlgebra/MKL.jl
 These are external packages, so we will not discuss them in detail here.
 Please refer to their respective documentations (especially because they have different behaviors than OpenBLAS with respect to multithreading).
 
+## Execution latency, package loading and package precompiling time
+
+### Reducing time to first plot etc.
+
+The first time a julia method is called it (and any methods it calls, or ones that can be statically determined) will be
+compiled. The [`@time`](@ref) macro family illustrates this.
+
+```
+julia> foo() = rand(2,2) * rand(2,2)
+foo (generic function with 1 method)
+
+julia> @time @eval foo();
+  0.252395 seconds (1.12 M allocations: 56.178 MiB, 2.93% gc time, 98.12% compilation time)
+
+julia> @time @eval foo();
+  0.000156 seconds (63 allocations: 2.453 KiB)
+```
+
+Note that `@time @eval` is better for measuring compilation time because without [`@eval`](@ref), some compilation may
+already be done before timing starts.
+
+When developing a package, you may be able to improve the experience of your users with *precompilation*
+so that when they use the package, the code they use is already compiled. To precompile package code effectively, it's
+recommended to use [`PrecompileTools.jl`](https://julialang.github.io/PrecompileTools.jl/stable/) to run a
+"precompile workload" during precompilation time that is representative of typical package usage, which will cache the
+native compiled code into the package `pkgimage` cache, greatly reducing "time to first execution" (often referred to as
+TTFX) for such usage.
+
+Note that [`PrecompileTools.jl`](https://julialang.github.io/PrecompileTools.jl/stable/) workloads can be
+disabled and sometimes configured via Preferences if you do not want to spend the extra time precompiling, which
+may be the case during development of a package.
+
+### Reducing package loading time
+
+Keeping the time taken to load the package down is usually helpful.
+General good practice for package developers includes:
+
+1. Reduce your dependencies to those you really need. Consider using [package extensions](@ref) to support interoperability with other packages without bloating your essential dependencies.
+3. Avoid use of [`__init__()`](@ref) functions unless there is no alternative, especially those which might trigger a lot
+   of compilation, or just take a long time to execute.
+4. Where possible, fix [invalidations](https://julialang.org/blog/2020/08/invalidations/) among your dependencies and from your package code.
+
+The tool [`@time_imports`](@ref) can be useful in the REPL to review the above factors.
+
+```julia-repl
+julia> @time @time_imports using Plots
+      0.5 ms  Printf
+     16.4 ms  Dates
+      0.7 ms  Statistics
+               ┌ 23.8 ms SuiteSparse_jll.__init__() 86.11% compilation time (100% recompilation)
+     90.1 ms  SuiteSparse_jll 91.57% compilation time (82% recompilation)
+      0.9 ms  Serialization
+               ┌ 39.8 ms SparseArrays.CHOLMOD.__init__() 99.47% compilation time (100% recompilation)
+    166.9 ms  SparseArrays 23.74% compilation time (100% recompilation)
+      0.4 ms  Statistics → SparseArraysExt
+      0.5 ms  TOML
+      8.0 ms  Preferences
+      0.3 ms  PrecompileTools
+      0.2 ms  Reexport
+... many deps omitted for example ...
+      1.4 ms  Tar
+               ┌ 73.8 ms p7zip_jll.__init__() 99.93% compilation time (100% recompilation)
+     79.4 ms  p7zip_jll 92.91% compilation time (100% recompilation)
+               ┌ 27.7 ms GR.GRPreferences.__init__() 99.77% compilation time (100% recompilation)
+     43.0 ms  GR 64.26% compilation time (100% recompilation)
+               ┌ 2.1 ms Plots.__init__() 91.80% compilation time (100% recompilation)
+    300.9 ms  Plots 0.65% compilation time (100% recompilation)
+  1.795602 seconds (3.33 M allocations: 190.153 MiB, 7.91% gc time, 39.45% compilation time: 97% of which was recompilation)
+
+```
+
+Notice that in this example there are multiple packages loaded, some with `__init__()` functions, some of which cause
+compilation of which some is recompilation. Recompilation is caused by earlier packages invalidating methods, then in
+these cases when the following packages run their `__init__()` function some hit recompilation before the code can be run.
+
+Further, note the `Statistics` extension `SparseArraysExt` has been activated because `SparseArrays` is in the dependency
+tree. i.e. see `0.4 ms  Statistics → SparseArraysExt`.
+
+This report gives a good opportunity to review whether the cost of dependency load time is worth the functionality it brings.
+Also the `Pkg` utility `why` can be used to report why a an indirect dependency exists.
+
+```
+(CustomPackage) pkg> why FFMPEG_jll
+  Plots → FFMPEG → FFMPEG_jll
+  Plots → GR → GR_jll → FFMPEG_jll
+```
+
+or to see the indirect dependencies that a package brings in, you can `pkg> rm` the package, see the deps that are removed
+from the manifest, then revert the change with `pkg> undo`.
+
+If loading time is dominated by slow `__init__()` methods having compilation, one verbose way to identify what is being
+compiled is to use the julia args `--trace-compile=stderr --trace-compile-timing` which will report a [`precompile`](@ref)
+statement each time a method is compiled, along with how long compilation took. The InteractiveUtils macro
+[`@trace_compile`](@ref) provides a way to enable those args for a specific call. So a call for a complete report report would look like:
+
+```
+julia> @time @time_imports @trace_compile using CustomPackage
+...
+```
+
+Note the `--startup-file=no` which helps isolate the test from packages you may have in your `startup.jl`.
+
+More analysis of the reasons for recompilation can be achieved with the
+[`SnoopCompile`](https://github.com/timholy/SnoopCompile.jl) package.
+
+### Reducing precompilation time
+
+If package precompilation is taking a long time, one option is to set the following internal and then precompile.
+```
+julia> Base.PRECOMPILE_TRACE_COMPILE[] = "stderr"
+
+pkg> precompile
+```
+
+This has the effect of setting `--trace-compile=stderr --trace-compile-timing` in the precompilation processes themselves,
+so will show which methods are precompiled and how long they took to precompile.
+
+There are also profiling options such as [using the external profiler Tracy to profile the precompilation process](@ref Profiling-package-precompilation-with-Tracy).
+
 
 ## Miscellaneous
 
@@ -1418,7 +1537,7 @@ be modified as suggested by the warnings.
 Sometimes you can enable better optimization by promising certain program properties.
 
   * Use [`@inbounds`](@ref) to eliminate array bounds checking within expressions. Be certain before doing
-    this. If the subscripts are ever out of bounds, you may suffer crashes or silent corruption.
+    this. If the indices are ever out of bounds, you may suffer crashes or silent corruption.
   * Use [`@fastmath`](@ref) to allow floating point optimizations that are correct for real numbers, but lead
     to differences for IEEE numbers. Be careful when doing this, as this may change numerical results.
     This corresponds to the `-ffast-math` option of clang.
@@ -1723,7 +1842,7 @@ using Distributed
 responses = Vector{Any}(undef, nworkers())
 @sync begin
     for (idx, pid) in enumerate(workers())
-        @async responses[idx] = remotecall_fetch(foo, pid, args...)
+        Threads.@spawn responses[idx] = remotecall_fetch(foo, pid, args...)
     end
 end
 ```
diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md
index 4a9803337990b..1f9f3129ca16b 100644
--- a/doc/src/manual/running-external-programs.md
+++ b/doc/src/manual/running-external-programs.md
@@ -332,8 +332,8 @@ will attempt to store the data in the kernel's buffers while waiting for a reade
 Another common solution is to separate the reader and writer of the pipeline into separate [`Task`](@ref)s:
 
 ```julia
-writer = @async write(process, "data")
-reader = @async do_compute(read(process, String))
+writer = Threads.@spawn write(process, "data")
+reader = Threads.@spawn do_compute(read(process, String))
 wait(writer)
 fetch(reader)
 ```
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index 5ba27b3921cec..c04e5e6d6760e 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -1207,6 +1207,10 @@ last backslash escapes a quote, since these backslashes appear before a quote.
 
 ## [Annotated Strings](@id man-annotated-strings)
 
+!!! note
+    The API for AnnotatedStrings is considered experimental and is subject to change between
+    Julia versions.
+
 It is sometimes useful to be able to hold metadata relating to regions of a
 string. A [`AnnotatedString`](@ref Base.AnnotatedString) wraps another string and
 allows for regions of it to be annotated with labelled values (`:label => value`).
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index 85a83120dc517..64a12ea88c7dd 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -743,7 +743,7 @@ ERROR: invalid redefinition of constant x
 julia> const y = 1.0
 1.0
 
-julia> y = 2.0
+julia> const y = 2.0
 WARNING: redefinition of constant y. This may fail, cause incorrect answers, or produce other errors.
 2.0
 ```
@@ -755,34 +755,13 @@ julia> const z = 100
 julia> z = 100
 100
 ```
-The last rule applies to immutable objects even if the variable binding would change, e.g.:
-```julia-repl
-julia> const s1 = "1"
-"1"
-
-julia> s2 = "1"
-"1"
-
-julia> pointer.([s1, s2], 1)
-2-element Array{Ptr{UInt8},1}:
- Ptr{UInt8} @0x00000000132c9638
- Ptr{UInt8} @0x0000000013dd3d18
-
-julia> s1 = s2
-"1"
-
-julia> pointer.([s1, s2], 1)
-2-element Array{Ptr{UInt8},1}:
- Ptr{UInt8} @0x0000000013dd3d18
- Ptr{UInt8} @0x0000000013dd3d18
-```
-However, for mutable objects the warning is printed as expected:
+* if an assignment would change the mutable object to which the variable points (regardless of whether those two objects are deeply equal), a warning is printed:
 ```jldoctest
 julia> const a = [1]
 1-element Vector{Int64}:
  1
 
-julia> a = [1]
+julia> const a = [1]
 WARNING: redefinition of constant a. This may fail, cause incorrect answers, or produce other errors.
 1-element Vector{Int64}:
  1
@@ -803,7 +782,7 @@ f (generic function with 1 method)
 julia> f()
 1
 
-julia> x = 2
+julia> const x = 2
 WARNING: redefinition of constant x. This may fail, cause incorrect answers, or produce other errors.
 2
 
diff --git a/src/Makefile b/src/Makefile
index 53ea9c8bccbf9..308678662c879 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -18,7 +18,6 @@ FLAGS := \
 	-I$(SRCDIR)/flisp -I$(SRCDIR)/support \
 	-I$(LIBUV_INC) -I$(build_includedir) \
 	-I$(JULIAHOME)/deps/valgrind
-
 FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \
 		 -Wno-comment -Wpointer-arith -Wundef
 ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result))
@@ -49,10 +48,9 @@ SRCS := \
 	jltypes gf typemap smallintset ast builtins module interpreter symbol \
 	dlload sys init task array genericmemory staticdata toplevel jl_uv datatype \
 	simplevector runtime_intrinsics precompile jloptions mtarraylist \
-	threading scheduler stackwalk gc-common gc gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler \
-	mmtk-gc method jlapi signal-handling safepoint timing subtype rtutils \
-	gc-heap-snapshot crc32c APInt-C processor ircode opaque_closure codegen-stubs \
-	coverage runtime_ccall engine
+	threading scheduler stackwalk gc-common gc-stock gc-mmtk gc-debug gc-pages gc-stacks gc-alloc-profiler gc-page-profiler \
+	method jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
+	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall engine
 
 RT_LLVMLINK :=
 CG_LLVMLINK :=
@@ -109,7 +107,7 @@ ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
 endif
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-tls.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,work-stealing-queue.h gc-interface.h gc-tls.h gc-tls-common.h gc-tls-mmtk.h julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
 ifeq ($(OS),WINNT)
 PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
 endif
@@ -241,6 +239,16 @@ $(BUILDDIR)/%.h.gen : $(SRCDIR)/%.d
 	sed 's/JULIA_/JL_PROBE_/' $@ > $@.tmp
 	mv $@.tmp $@
 
+# Compile files from the binding side and copy so file into lib folder
+ifeq ($(WITH_MMTK), 1)
+$(MMTK_JULIA_INC)/%.o: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC)
+	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
+$(MMTK_JULIA_INC)/%.dbg.obj: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC)
+	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+$(MMTK_LIB_DST): $(MMTK_LIB_SRC)
+	@$(call PRINT_MMTK, cp $< $@)
+endif
+
 $(BUILDDIR)/jl_internal_funcs.inc: $(SRCDIR)/jl_exported_funcs.inc
 	# Generate `.inc` file that contains a list of `#define` macros to rename functions defined in `libjulia-internal`
 	# to have a `ijl_` prefix instead of `jl_`, to denote that they are coming from `libjulia-internal`.  This avoids
@@ -262,15 +270,6 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.d
 $(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d
 	@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
 
-ifeq ($(WITH_MMTK), 1)
-$(MMTK_JULIA_INC)/%.o: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC)
-	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
-$(MMTK_JULIA_INC)/%.dbg.obj: $(MMTK_JULIA_INC)/%.c $(HEADERS) | $(MMTK_JULIA_INC)
-	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@)
-$(MMTK_LIB_DST): $(MMTK_LIB_SRC)
-	@$(call PRINT_MMTK, cp $< $@)
-endif
-
 # public header rules
 $(eval $(call dir_target,$(build_includedir)/julia))
 define public_header_target
@@ -311,10 +310,10 @@ endif
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@
 
 $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT): $(SRCDIR)/ccalllazyfoo.c
-	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,ccalllazyfoo.$(SHLIB_EXT)))
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazyfoo.$(SHLIB_EXT)))
 
 $(build_shlibdir)/libccalllazybar.$(SHLIB_EXT): $(SRCDIR)/ccalllazybar.c $(build_shlibdir)/libccalllazyfoo.$(SHLIB_EXT)
-	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,ccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo)
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(call SONAME_FLAGS,libccalllazybar.$(SHLIB_EXT)) -lccalllazyfoo)
 
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE)
 	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(FLAGS) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(NO_WHOLE_ARCHIVE) $(CG_LLVMLINK)) -lpthread
@@ -340,13 +339,12 @@ $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
 $(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
 $(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
 $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
-$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h
-$(BUILDDIR)/mmtk-gc.o $(BUILDDIR)/mmtk-gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
-$(BUILDDIR)/gc-common.o $(BUILDDIR)/gc-common.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
-$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h
-$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h
+$(BUILDDIR)/gc-mmtk.o $(BUILDDIR)/gc-mmtk.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
+$(BUILDDIR)/gc-stock.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc-common.h $(SRCDIR)/gc-stock.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(SRCDIR)/gc-page-profiler.h
+$(BUILDDIR)/gc-heap-snapshot.o $(BUILDDIR)/gc-heap-snapshot.dbg.obj: $(SRCDIR)/gc-heap-snapshot.h
+$(BUILDDIR)/gc-alloc-profiler.o $(BUILDDIR)/gc-alloc-profiler.dbg.obj: $(SRCDIR)/gc-alloc-profiler.h
 $(BUILDDIR)/gc-page-profiler.o $(BUILDDIR)/gc-page-profiler.dbg.obj: $(SRCDIR)/gc-page-profiler.h
 $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
@@ -357,10 +355,10 @@ $(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDI
 $(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
 $(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h
 $(BUILDDIR)/llvm-demote-float16.o $(BUILDDIR)/llvm-demote-float16.dbg.obj: $(SRCDIR)/jitlayers.h
-$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
 $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
-$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
+$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-gc-interface-passes.h
 $(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h
 $(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/llvm-codegen-shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-codegen-shared.h
@@ -374,7 +372,7 @@ $(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h $(SRCDIR)/common_symbols1.inc $(SRCDIR)/common_symbols2.inc
 $(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
 
-$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
+$(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc-common.o gc-stock.o gc.dbg.obj init.c init.dbg.obj task.o task.dbg.obj): $(addprefix $(SRCDIR)/,threading.h)
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
 
 # archive library file rules
@@ -411,21 +409,21 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 
 CXXLD = $(CXX) -shared
 
-$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in
+$(BUILDDIR)/julia.expmap: $(SRCDIR)/julia.expmap.in $(JULIAHOME)/VERSION $(LLVM_CONFIG_ABSOLUTE)
 	sed <'$<' >'$@' -e "s/@JULIA_SHLIB_SYMBOL_VERSION@/JL_LIBJULIA_$(SOMAJOR)/" \
 		        -e "s/@LLVM_SHLIB_SYMBOL_VERSION@/$(LLVM_SHLIB_SYMBOL_VERSION)/"
 
 $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(OBJS) $(MMTK_OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(MMTK_OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
+		$(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@
-		$(DSYMUTIL) $@
+	$(DSYMUTIL) $@
 
 $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(DOBJS) $(MMTK_DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(MMTK_DOBJS) $(RPATH_LIB) -o $@ \
 		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@
-		$(DSYMUTIL) $@
+	$(DSYMUTIL) $@
 
 ifneq ($(OS), WINNT)
 $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_SHLIB_EXT): \
@@ -445,7 +443,7 @@ libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS)
 
 $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(BUILDDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
 	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \
-		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
+		$(JLDFLAGS) $(BOLT_LDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
 	@$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
@@ -480,7 +478,8 @@ clean:
 	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
 	-rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen
 	-rm -f $(BUILDDIR)/julia.expmap
-	-rm -f $(BUILDDIR)/julia_version.h $(MMTK_OBJS) $(MMTK_DOBJS)
+	-rm -f $(BUILDDIR)/julia_version.h
+	-rm -f $(MMTK_OBJS) $(MMTK_DOBJS)
 
 clean-flisp:
 	-$(MAKE) -C $(SRCDIR)/flisp clean BUILDDIR='$(abspath $(BUILDDIR)/flisp)'
diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp
index 7c31b6606139a..0a193ee132556 100644
--- a/src/abi_aarch64.cpp
+++ b/src/abi_aarch64.cpp
@@ -16,7 +16,7 @@ struct ABI_AArch64Layout : AbiLayout {
 Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields > 0`
     if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
         return nullptr;
     size_t nfields = dt->layout->nfields;
@@ -62,7 +62,7 @@ Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->isbitsegal && dt->nfields == 0`
     Type *lltype;
     // Check size first since it's cheaper.
     switch (jl_datatype_size(dt)) {
@@ -88,7 +88,7 @@ Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->name->mutabl || dt->layout->npointers || dt->layout->flags.haspadding)
+    if (dt->name->mutabl || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
         return nullptr;
     return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
 }
@@ -184,7 +184,7 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
     // uniquely addressable members.
     // Maximum HFA and HVA size is 64 bytes (4 x fp128 or 16bytes vector)
     size_t dsz = jl_datatype_size(dt);
-    if (dsz > 64 || !dt->layout || dt->layout->npointers || dt->layout->flags.haspadding)
+    if (dsz > 64 || !dt->layout || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
         return NULL;
     nele = 0;
     ElementType eltype;
diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp
index 68f980d7b40da..8839a37da6e13 100644
--- a/src/abi_arm.cpp
+++ b/src/abi_arm.cpp
@@ -82,7 +82,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
     if (jl_is_structtype(dt)) {
         // Fast path checks before descending the type hierarchy
         // (4 x 128b vector == 64B max size)
-        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || dt->layout->flags.haspadding)
+        if (jl_datatype_size(dt) > 64 || dt->layout->npointers || !dt->layout->flags.isbitsegal || dt->layout->flags.haspadding)
             return 0;
 
         base = NULL;
diff --git a/src/abi_ppc64le.cpp b/src/abi_ppc64le.cpp
index 1f10817cfeeee..f02e1022ddc2d 100644
--- a/src/abi_ppc64le.cpp
+++ b/src/abi_ppc64le.cpp
@@ -44,7 +44,7 @@ struct ABI_PPC64leLayout : AbiLayout {
 // count the homogeneous floating aggregate size (saturating at max count of 8)
 unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
 {
-    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || ty->layout->flags.haspadding)
+    if (jl_datatype_size(ty) > 128 || ty->layout->npointers || !ty->layout->flags.isbitsegal || ty->layout->flags.haspadding)
         return 9;
 
     size_t i, l = ty->layout->nfields;
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 815e305d14376..c2f112f9c9d5c 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -19,24 +19,9 @@
 
 // analysis passes
 #include <llvm/Analysis/Passes.h>
-#include <llvm/Analysis/BasicAliasAnalysis.h>
-#include <llvm/Analysis/TypeBasedAliasAnalysis.h>
-#include <llvm/Analysis/ScopedNoAliasAA.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
-#include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
-#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
-#include <llvm/Transforms/Scalar/GVN.h>
-#include <llvm/Transforms/IPO/AlwaysInliner.h>
-#include <llvm/Transforms/InstCombine/InstCombine.h>
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#include <llvm/Transforms/Scalar/SimpleLoopUnswitch.h>
-#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
@@ -110,6 +95,17 @@ void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
     }
 }
 
+extern "C" JL_DLLEXPORT_CODEGEN
+void jl_get_llvm_mis_impl(void *native_code, arraylist_t* MIs)
+{
+    jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
+    auto map = data->jl_fvar_map;
+    for (auto &ci : map) {
+        jl_method_instance_t *mi = ci.first->def;
+        arraylist_push(MIs, mi);
+    }
+}
+
 extern "C" JL_DLLEXPORT_CODEGEN
 void jl_get_llvm_gvs_impl(void *native_code, arraylist_t *gvs)
 {
@@ -299,7 +295,8 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
     jl_value_t *ci = cgparams.lookup(mi, world, world);
     JL_GC_PROMISE_ROOTED(ci);
     jl_code_instance_t *codeinst = NULL;
-    if (ci != jl_nothing) {
+    JL_GC_PUSH1(&codeinst);
+    if (ci != jl_nothing && jl_atomic_load_relaxed(&((jl_code_instance_t *)ci)->inferred) != jl_nothing) {
         codeinst = (jl_code_instance_t*)ci;
     }
     else {
@@ -316,9 +313,11 @@ jl_code_instance_t *jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_
                 jl_mi_cache_insert(mi, codeinst);
         }
     }
+    JL_GC_POP();
     return codeinst;
 }
 
+arraylist_t new_invokes;
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup, and can
 // also be used be extern consumers like GPUCompiler.jl to obtain a module containing
@@ -368,8 +367,12 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
     params.imaging_mode = imaging;
     params.debug_level = cgparams->debug_info_level;
     params.external_linkage = _external_linkage;
+    arraylist_new(&new_invokes, 0);
     size_t compile_for[] = { jl_typeinf_world, _world };
-    for (int worlds = 0; worlds < 2; worlds++) {
+    int worlds = 0;
+    if (jl_options.trim != JL_TRIM_NO)
+        worlds = 1;
+    for (; worlds < 2; worlds++) {
         JL_TIMING(NATIVE_AOT, NATIVE_Codegen);
         size_t this_world = compile_for[worlds];
         if (!this_world)
@@ -388,6 +391,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
                 continue;
             }
             mi = (jl_method_instance_t*)item;
+compile_mi:
             src = NULL;
             // if this method is generally visible to the current compilation world,
             // and this is either the primary world, or not applicable in the primary world
@@ -395,16 +399,47 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
             if (jl_atomic_load_relaxed(&mi->def.method->primary_world) <= this_world && this_world <= jl_atomic_load_relaxed(&mi->def.method->deleted_world)) {
                 // find and prepare the source code to compile
                 jl_code_instance_t *codeinst = jl_ci_cache_lookup(*cgparams, mi, this_world);
-                if (codeinst && !params.compiled_functions.count(codeinst)) {
+                if (jl_options.trim != JL_TRIM_NO && !codeinst) {
+                    // If we're building a small image, we need to compile everything
+                    // to ensure that we have all the information we need.
+                    jl_safe_printf("Codegen decided not to compile code root");
+                    jl_(mi);
+                    abort();
+                }
+                if (codeinst && !params.compiled_functions.count(codeinst) && !data->jl_fvar_map.count(codeinst)) {
                     // now add it to our compilation results
-                    JL_GC_PROMISE_ROOTED(codeinst->rettype);
-                    orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def),
-                            params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
-                            Triple(clone.getModuleUnlocked()->getTargetTriple()));
-                    jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
-                    if (result_m)
-                        params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+                    // Const returns do not do codegen, but juliac inspects codegen results so make a dummy fvar entry to represent it
+                    if (jl_options.trim != JL_TRIM_NO && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) {
+                        data->jl_fvar_map[codeinst] = std::make_tuple((uint32_t)-3, (uint32_t)-3);
+                    } else {
+                        JL_GC_PROMISE_ROOTED(codeinst->rettype);
+                        orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(codeinst->def),
+                                params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
+                                Triple(clone.getModuleUnlocked()->getTargetTriple()));
+                        jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, NULL, params);
+                        if (result_m)
+                            params.compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
+                        else if (jl_options.trim != JL_TRIM_NO) {
+                            // if we're building a small image, we need to compile everything
+                            // to ensure that we have all the information we need.
+                            jl_safe_printf("codegen failed to compile code root");
+                            jl_(mi);
+                            abort();
+                        }
+                    }
                 }
+            } else if (this_world != jl_typeinf_world) {
+                /*
+                jl_safe_printf("Codegen could not find requested codeinstance to be compiled\n");
+                jl_(mi);
+                abort();
+                */
+            }
+            // TODO: is goto the best way to do this?
+            jl_compile_workqueue(params, policy);
+            mi = (jl_method_instance_t*)arraylist_pop(&new_invokes);
+            if (mi != NULL) {
+                goto compile_mi;
             }
         }
 
@@ -412,6 +447,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
         jl_compile_workqueue(params, policy);
     }
     JL_GC_POP();
+    arraylist_free(&new_invokes);
 
     // process the globals array, before jl_merge_module destroys them
     SmallVector<std::string, 0> gvars(params.global_targets.size());
@@ -1164,7 +1200,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
         raw_svector_ostream OS(out.obj);
         legacy::PassManager emitter;
         addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+#if JL_LLVM_VERSION >= 180000
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::ObjectFile, false))
+#else
         if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false))
+#endif
             jl_safe_printf("ERROR: target does not support generation of object files\n");
         emitter.run(M);
         timers.obj.stopTimer();
@@ -1175,7 +1215,11 @@ static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimer
         raw_svector_ostream OS(out.asm_);
         legacy::PassManager emitter;
         addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+#if JL_LLVM_VERSION >= 180000
+        if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::AssemblyFile, false))
+#else
         if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false))
+#endif
             jl_safe_printf("ERROR: target does not support generation of assembly files\n");
         emitter.run(M);
         timers.asm_.stopTimer();
@@ -1632,7 +1676,11 @@ void jl_dump_native_impl(void *native_code,
             jl_ExecutionEngine->getTargetOptions(),
             RelocModel,
             CMModel,
+#if JL_LLVM_VERSION >= 180000
+            CodeGenOptLevel::Aggressive // -O3 TODO: respect command -O0 flag?
+#else
             CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
+#endif
             ));
     fixupTM(*SourceTM);
     auto DL = jl_create_datalayout(*SourceTM);
@@ -1892,26 +1940,31 @@ void jl_dump_native_impl(void *native_code,
         JL_TIMING(NATIVE_AOT, NATIVE_Write);
 
         object::Archive::Kind Kind = getDefaultForHost(TheTriple);
+#if JL_LLVM_VERSION >= 180000
+#define WritingMode SymtabWritingMode::NormalSymtab
+#else
+#define WritingMode true
+#endif
 #define WRITE_ARCHIVE(fname, field, prefix, suffix) \
-        if (fname) {\
-            SmallVector<NewArchiveMember, 0> archive; \
-            SmallVector<std::string, 16> filenames; \
-            SmallVector<StringRef, 16> buffers; \
-            for (size_t i = 0; i < threads; i++) { \
-                filenames.push_back((StringRef("text") + prefix + "#" + Twine(i) + suffix).str()); \
-                buffers.push_back(StringRef(data_outputs[i].field.data(), data_outputs[i].field.size())); \
-            } \
-            filenames.push_back("metadata" prefix suffix); \
-            buffers.push_back(StringRef(metadata_outputs[0].field.data(), metadata_outputs[0].field.size())); \
-            if (z) { \
-                filenames.push_back("sysimg" prefix suffix); \
-                buffers.push_back(StringRef(sysimg_outputs[0].field.data(), sysimg_outputs[0].field.size())); \
-            } \
-            for (size_t i = 0; i < filenames.size(); i++) { \
-                archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \
-            } \
-            handleAllErrors(writeArchive(fname, archive, true, Kind, true, false), reportWriterError); \
-        }
+    if (fname) {\
+        SmallVector<NewArchiveMember, 0> archive; \
+        SmallVector<std::string, 16> filenames; \
+        SmallVector<StringRef, 16> buffers; \
+        for (size_t i = 0; i < threads; i++) { \
+            filenames.push_back((StringRef("text") + prefix + "#" + Twine(i) + suffix).str()); \
+            buffers.push_back(StringRef(data_outputs[i].field.data(), data_outputs[i].field.size())); \
+        } \
+        filenames.push_back("metadata" prefix suffix); \
+        buffers.push_back(StringRef(metadata_outputs[0].field.data(), metadata_outputs[0].field.size())); \
+        if (z) { \
+            filenames.push_back("sysimg" prefix suffix); \
+            buffers.push_back(StringRef(sysimg_outputs[0].field.data(), sysimg_outputs[0].field.size())); \
+        } \
+        for (size_t i = 0; i < filenames.size(); i++) { \
+            archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \
+        } \
+        handleAllErrors(writeArchive(fname, archive, WritingMode, Kind, true, false), reportWriterError); \
+    }
 
         WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc");
         WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc");
diff --git a/src/array.c b/src/array.c
index e7b2ee0e0b9b7..f0051ec17565a 100644
--- a/src/array.c
+++ b/src/array.c
@@ -304,30 +304,8 @@ JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
     jl_task_t *ct = jl_current_task;
     jl_value_t *s;
     jl_ptls_t ptls = ct->ptls;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-#ifndef MMTK_GC
-        int pool_id = jl_gc_szclass_align8(allocsz);
-        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
-        int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
-        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
-#else
-        s = jl_mmtk_gc_alloc_default(ptls, allocsz, 8, jl_string_type);
-#endif
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-#ifndef MMTK_GC
-        s = jl_gc_big_alloc_noinline(ptls, allocsz);
-#else
-        s = jl_mmtk_gc_alloc_big(ptls, allocsz);
-#endif
-    }
+    s = (jl_value_t*)jl_gc_alloc(ptls, sz, jl_string_type);
     jl_set_typetagof(s, jl_string_tag, 0);
-    maybe_record_alloc_to_profile(s, len, jl_string_type);
     *(size_t*)s = len;
     jl_string_data(s)[len] = 0;
     return s;
diff --git a/src/ast.c b/src/ast.c
index 7c775bf25d486..ea1de429a946c 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -7,6 +7,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+
 #ifdef _OS_WINDOWS_
 #include <malloc.h>
 #endif
@@ -175,7 +176,8 @@ static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
     jl_sym_t *var = scmsym_to_julia(fl_ctx, args[0]);
     jl_binding_t *b = jl_get_module_binding(ctx->module, var, 0);
-    return (b != NULL && jl_atomic_load_relaxed(&b->owner) == b) ? fl_ctx->T : fl_ctx->F;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return (bpart != NULL && decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GLOBAL) ? fl_ctx->T : fl_ctx->F;
 }
 
 static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
@@ -204,15 +206,56 @@ static value_t fl_nothrow_julia_global(fl_context_t *fl_ctx, value_t *args, uint
         var = scmsym_to_julia(fl_ctx, args[1]);
     }
     jl_binding_t *b = jl_get_module_binding(mod, var, 0);
-    b = b ? jl_atomic_load_relaxed(&b->owner) : NULL;
-    return b != NULL && jl_atomic_load_relaxed(&b->value) != NULL ? fl_ctx->T : fl_ctx->F;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (!bpart)
+        return fl_ctx->F;
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return fl_ctx->F;
+    return  (jl_bkind_is_some_constant(decode_restriction_kind(pku)) ?
+        decode_restriction_value(pku) : jl_atomic_load_relaxed(&b->value)) != NULL ? fl_ctx->T : fl_ctx->F;
 }
 
-static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
+// Used to generate a unique suffix for a given symbol (e.g. variable or type name)
+// first argument contains a stack of method definitions seen so far by `closure-convert` in flisp.
+// if the top of the stack is non-NIL, we use it to augment the suffix so that it becomes
+// of the form $top_level_method_name##$counter, where `counter` is the smallest integer
+// such that the resulting name is not already defined in the current module's bindings.
+// If the top of the stack is NIL, we simply return the current module's counter.
+// This ensures that precompile statements are a bit more stable across different versions
+// of a codebase. see #53719
+static value_t fl_module_unique_name(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
+    argcount(fl_ctx, "julia-module-unique-name", nargs, 1);
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
-    assert(ctx->module);
-    return fixnum(jl_module_next_counter(ctx->module));
+    jl_module_t *m = ctx->module;
+    assert(m != NULL);
+    // Get the outermost function name from the `parsed_method_stack` top
+    char *funcname = NULL;
+    value_t parsed_method_stack = args[0];
+    if (parsed_method_stack != fl_ctx->NIL) {
+        value_t bottom_stack_symbol = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "last")), parsed_method_stack);
+        funcname = tosymbol(fl_ctx, bottom_stack_symbol, "julia-module-unique-name")->name;
+    }
+    size_t sz = funcname != NULL ? strlen(funcname) + 32 : 32; // 32 is enough for the suffix
+    char *buf = (char*)alloca(sz);
+    if (funcname != NULL && strchr(funcname, '#') == NULL) {
+        for (int i = 0; ; i++) {
+            snprintf(buf, sz, "%s##%d", funcname, i);
+            jl_sym_t *sym = jl_symbol(buf);
+            JL_LOCK(&m->lock);
+            if (jl_get_module_binding(m, sym, 0) == NULL) { // make sure this name is not already taken
+                jl_get_module_binding(m, sym, 1); // create the binding
+                JL_UNLOCK(&m->lock);
+                return symbol(fl_ctx, buf);
+            }
+            JL_UNLOCK(&m->lock);
+        }
+    }
+    else {
+        snprintf(buf, sz, "%d", jl_module_next_counter(m));
+    }
+    return symbol(fl_ctx, buf);
 }
 
 static int jl_is_number(jl_value_t *v)
@@ -245,7 +288,7 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
 static const builtinspec_t julia_flisp_ast_ext[] = {
     { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint
     { "nothrow-julia-global", fl_nothrow_julia_global },
-    { "current-julia-module-counter", fl_current_module_counter },
+    { "current-julia-module-counter", fl_module_unique_name },
     { "julia-scalar?", fl_julia_scalar },
     { NULL, NULL }
 };
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index 8b97c46df72da..7fbd555758675 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -69,6 +69,7 @@ DECLARE_BUILTIN(svec);
 DECLARE_BUILTIN(swapfield);
 DECLARE_BUILTIN(swapglobal);
 DECLARE_BUILTIN(throw);
+DECLARE_BUILTIN(throw_methoderror);
 DECLARE_BUILTIN(tuple);
 DECLARE_BUILTIN(typeassert);
 DECLARE_BUILTIN(typeof);
diff --git a/src/builtins.c b/src/builtins.c
index 07aad87c4556c..939aef4234ac9 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -591,6 +591,14 @@ JL_CALLABLE(jl_f_throw)
     return jl_nothing;
 }
 
+JL_CALLABLE(jl_f_throw_methoderror)
+{
+    JL_NARGSV(throw_methoderror, 1);
+    size_t world = jl_get_tls_world_age();
+    jl_method_error(args[0], &args[1], nargs, world);
+    return jl_nothing;
+}
+
 JL_CALLABLE(jl_f_ifelse)
 {
     JL_NARGS(ifelse, 3, 3);
@@ -1380,19 +1388,10 @@ JL_CALLABLE(jl_f_get_binding_type)
     jl_sym_t *var = (jl_sym_t*)args[1];
     JL_TYPECHK(get_binding_type, module, (jl_value_t*)mod);
     JL_TYPECHK(get_binding_type, symbol, (jl_value_t*)var);
-    jl_value_t *ty = jl_get_binding_type(mod, var);
-    if (ty == (jl_value_t*)jl_nothing) {
-        jl_binding_t *b = jl_get_module_binding(mod, var, 0);
-        if (b == NULL)
-            return (jl_value_t*)jl_any_type;
-        jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-        if (b2 != b)
-            return (jl_value_t*)jl_any_type;
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-        return jl_atomic_load_relaxed(&b->ty);
-    }
-    return ty;
+    jl_value_t *ret = jl_get_binding_type(mod, var);
+    if (ret == jl_nothing)
+        return (jl_value_t*)jl_any_type;
+    return ret;
 }
 
 JL_CALLABLE(jl_f_swapglobal)
@@ -2095,6 +2094,12 @@ static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layou
         return references_name(((jl_uniontype_t*)p)->a, name, affects_layout, freevars) ||
                references_name(((jl_uniontype_t*)p)->b, name, affects_layout, freevars);
     }
+    if (jl_is_vararg(p)) {
+        jl_value_t *T = ((jl_vararg_t*)p)->T;
+        jl_value_t *N = ((jl_vararg_t*)p)->N;
+        return (T && references_name(T, name, affects_layout, freevars)) ||
+               (N && references_name(N, name, affects_layout, freevars));
+    }
     if (jl_is_typevar(p))
         return 0; // already checked by unionall, if applicable
     if (jl_is_datatype(p)) {
@@ -2211,6 +2216,9 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     JL_GC_PUSH2(&a, &b);
     a = jl_rewrap_unionall((jl_value_t*)dta->super, dta->name->wrapper);
     b = jl_rewrap_unionall((jl_value_t*)dtb->super, dtb->name->wrapper);
+    // if tb recursively refers to itself in its supertype, assume that it refers to ta
+    // before checking whether the supertypes are equal
+    b = jl_substitute_datatype(b, dtb, dta);
     if (!jl_types_equal(a, b))
         goto no;
     JL_TRY {
@@ -2447,7 +2455,8 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin_func("finalizer", jl_f_finalizer);
     add_builtin_func("_compute_sparams", jl_f__compute_sparams);
     add_builtin_func("_svec_ref", jl_f__svec_ref);
-    add_builtin_func("current_scope", jl_f_current_scope);
+    jl_builtin_current_scope = add_builtin_func("current_scope", jl_f_current_scope);
+    add_builtin_func("throw_methoderror", jl_f_throw_methoderror);
 
     // builtin types
     add_builtin("Any", (jl_value_t*)jl_any_type);
@@ -2514,6 +2523,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("QuoteNode", (jl_value_t*)jl_quotenode_type);
     add_builtin("NewvarNode", (jl_value_t*)jl_newvarnode_type);
     add_builtin("Binding", (jl_value_t*)jl_binding_type);
+    add_builtin("BindingPartition", (jl_value_t*)jl_binding_partition_type);
     add_builtin("GlobalRef", (jl_value_t*)jl_globalref_type);
     add_builtin("NamedTuple", (jl_value_t*)jl_namedtuple_type);
 
diff --git a/src/ccall.cpp b/src/ccall.cpp
index 3c2857608c163..2de5be6906e7c 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -22,6 +22,8 @@ TRANSFORMED_CCALL_STAT(jl_cpu_wake);
 TRANSFORMED_CCALL_STAT(jl_gc_safepoint);
 TRANSFORMED_CCALL_STAT(jl_get_ptls_states);
 TRANSFORMED_CCALL_STAT(jl_threadid);
+TRANSFORMED_CCALL_STAT(jl_get_ptls_rng);
+TRANSFORMED_CCALL_STAT(jl_set_ptls_rng);
 TRANSFORMED_CCALL_STAT(jl_get_tls_world_age);
 TRANSFORMED_CCALL_STAT(jl_get_world_counter);
 TRANSFORMED_CCALL_STAT(jl_gc_enable_disable_finalizers_internal);
@@ -439,24 +441,13 @@ static Value *llvm_type_rewrite(
     // we need to use this alloca copy trick instead
     // On ARM and AArch64, the ABI requires casting through memory to different
     // sizes.
-    Value *from;
-    Value *to;
     const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
     Align align = std::max(DL.getPrefTypeAlign(target_type), DL.getPrefTypeAlign(from_type));
-    if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
-        to = emit_static_alloca(ctx, target_type);
-        setName(ctx.emission_context, to, "type_rewrite_buffer");
-        cast<AllocaInst>(to)->setAlignment(align);
-        from = to;
-    }
-    else {
-        from = emit_static_alloca(ctx, from_type);
-        setName(ctx.emission_context, from, "type_rewrite_buffer");
-        cast<AllocaInst>(from)->setAlignment(align);
-        to = from;
-    }
-    ctx.builder.CreateAlignedStore(v, from, align);
-    auto pun = ctx.builder.CreateAlignedLoad(target_type, to, align);
+    size_t nb = std::max(DL.getTypeAllocSize(target_type), DL.getTypeAllocSize(from_type));
+    AllocaInst *cast = emit_static_alloca(ctx, nb, align);
+    setName(ctx.emission_context, cast, "type_rewrite_buffer");
+    ctx.builder.CreateAlignedStore(v, cast, align);
+    auto pun = ctx.builder.CreateAlignedLoad(target_type, cast, align);
     setName(ctx.emission_context, pun, "type_rewrite");
     return pun;
 }
@@ -494,7 +485,7 @@ static const std::string make_errmsg(const char *fname, int n, const char *err)
     return msg.str();
 }
 
-static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn)
+static jl_cgval_t typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_value_t *jlto, jl_unionall_t *jlto_env, int argn)
 {
     if (jlto != (jl_value_t*)jl_any_type && !jl_subtype(jvinfo.typ, jlto)) {
         if (jlto == (jl_value_t*)jl_voidpointer_type) {
@@ -502,6 +493,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
             if (!jl_is_cpointer_type(jvinfo.typ)) {
                 // emit a typecheck, if not statically known to be correct
                 emit_cpointercheck(ctx, jvinfo, make_errmsg("ccall", argn + 1, ""));
+                return update_julia_type(ctx, jvinfo, (jl_value_t*)jl_pointer_type);
             }
         }
         else {
@@ -526,8 +518,10 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
                 ctx.builder.CreateUnreachable();
                 ctx.builder.SetInsertPoint(passBB);
             }
+            return update_julia_type(ctx, jvinfo, jlto);
         }
     }
+    return jvinfo;
 }
 
 // Emit code to convert argument to form expected by C ABI
@@ -537,7 +531,7 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
 static Value *julia_to_native(
         jl_codectx_t &ctx,
         Type *to, bool toboxed, jl_value_t *jlto, jl_unionall_t *jlto_env,
-        const jl_cgval_t &jvinfo,
+        jl_cgval_t jvinfo,
         bool byRef, int argn)
 {
     // We're passing Any
@@ -547,24 +541,16 @@ static Value *julia_to_native(
     }
     assert(jl_is_datatype(jlto) && jl_struct_try_layout((jl_datatype_t*)jlto));
 
-    typeassert_input(ctx, jvinfo, jlto, jlto_env, argn);
+    jvinfo = typeassert_input(ctx, jvinfo, jlto, jlto_env, argn);
     if (!byRef)
         return emit_unbox(ctx, to, jvinfo, jlto);
 
     // pass the address of an alloca'd thing, not a box
     // since those are immutable.
-    Value *slot = emit_static_alloca(ctx, to);
-    unsigned align = julia_alignment(jlto);
-    cast<AllocaInst>(slot)->setAlignment(Align(align));
+    Align align(julia_alignment(jlto));
+    Value *slot = emit_static_alloca(ctx, to, align);
     setName(ctx.emission_context, slot, "native_convert_buffer");
-    if (!jvinfo.ispointer()) {
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
-        ai.decorateInst(ctx.builder.CreateStore(emit_unbox(ctx, to, jvinfo, jlto), slot));
-    }
-    else {
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa);
-        emit_memcpy(ctx, slot, ai, jvinfo, jl_datatype_size(jlto), align, align);
-    }
+    emit_unbox_store(ctx, jvinfo, slot, ctx.tbaa().tbaa_stack, align);
     return slot;
 }
 
@@ -1671,9 +1657,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         emit_gc_safepoint(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const);
         return ghostValue(ctx, jl_nothing_type);
     }
-    else if (is_libjulia_func("jl_get_ptls_states")) {
+    else if (is_libjulia_func(jl_get_ptls_states)) {
         ++CCALL_STAT(jl_get_ptls_states);
-        assert(lrt == ctx.types().T_size);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, get_current_ptls(ctx), retboxed, rt, unionall, static_rt);
@@ -1693,6 +1678,36 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ai.decorateInst(tid);
         return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
     }
+    else if (is_libjulia_func(jl_get_ptls_rng)) {
+        ++CCALL_STAT(jl_get_ptls_rng);
+        assert(lrt == getInt64Ty(ctx.builder.getContext()));
+        assert(!isVa && !llvmcall && nccallargs == 0);
+        JL_GC_POP();
+        Value *ptls_p = get_current_ptls(ctx);
+        const int rng_offset = offsetof(jl_tls_states_t, rngseed);
+        Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t)));
+        setName(ctx.emission_context, rng_ptr, "rngseed_ptr");
+        LoadInst *rng_value = ctx.builder.CreateAlignedLoad(getInt64Ty(ctx.builder.getContext()), rng_ptr, Align(sizeof(void*)));
+        setName(ctx.emission_context, rng_value, "rngseed");
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(rng_value);
+        return mark_or_box_ccall_result(ctx, rng_value, retboxed, rt, unionall, static_rt);
+    }
+    else if (is_libjulia_func(jl_set_ptls_rng)) {
+        ++CCALL_STAT(jl_set_ptls_rng);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
+        assert(!isVa && !llvmcall && nccallargs == 1);
+        JL_GC_POP();
+        Value *ptls_p = get_current_ptls(ctx);
+        const int rng_offset = offsetof(jl_tls_states_t, rngseed);
+        Value *rng_ptr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptls_p, ConstantInt::get(ctx.types().T_size, rng_offset / sizeof(int8_t)));
+        setName(ctx.emission_context, rng_ptr, "rngseed_ptr");
+        assert(argv[0].V->getType() == getInt64Ty(ctx.builder.getContext()));
+        auto store = ctx.builder.CreateAlignedStore(argv[0].V, rng_ptr, Align(sizeof(void*)));
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        ai.decorateInst(store);
+        return ghostValue(ctx, jl_nothing_type);
+    }
     else if (is_libjulia_func(jl_get_tls_world_age)) {
         bool toplevel = !(ctx.linfo && jl_is_method(ctx.linfo->def.method));
         if (!toplevel) { // top level code does not see a stable world age during execution
@@ -1823,8 +1838,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         ctx.builder.SetInsertPoint(checkBB);
         auto signal_page_load = ctx.builder.CreateLoad(
                 ctx.types().T_size,
-                ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_size,
-                    get_current_signal_page_from_ptls(ctx.builder, ctx.types().T_size, get_current_ptls(ctx), ctx.tbaa().tbaa_const), -1),
+                emit_ptrgep(ctx, get_current_signal_page_from_ptls(ctx.builder, get_current_ptls(ctx), ctx.tbaa().tbaa_const),
+                    -sizeof(size_t)),
                 true);
         setName(ctx.emission_context, signal_page_load, "signal_page_load");
         ctx.builder.CreateBr(contBB);
@@ -1839,8 +1854,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1);
-        setName(ctx.emission_context, strp, "string_ptr");
+        auto strp = emit_ptrgep(ctx, obj, ctx.types().sizeof_ptr, "string_ptr");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
@@ -1851,9 +1865,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         auto obj = emit_pointer_from_objref(ctx, boxed(ctx, argv[0])); // T_pprjlvalue
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(
-            ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
-        setName(ctx.emission_context, strp, "symbol_name");
+        auto strp = emit_ptrgep(ctx, obj, sizeof(jl_sym_t), "symbol_name");
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
@@ -1966,7 +1978,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
             // If the value is not boxed, try to compute the object id without
             // reboxing it.
             auto T_p_derived = PointerType::get(ctx.builder.getContext(), AddressSpace::Derived);
-            if (!val.isghost && !val.ispointer())
+            if (!val.isghost)
                 val = value_to_pointer(ctx, val);
             Value *args[] = {
                 emit_typeof(ctx, val, false, true),
@@ -2062,7 +2074,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     if (sret) {
         assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid");
         if (jl_is_pointerfree(rt)) {
-            result = emit_static_alloca(ctx, lrt);
+            result = emit_static_alloca(ctx, lrt, Align(julia_alignment(rt)));
             setName(ctx.emission_context, result, "ccall_sret");
             sretty = lrt;
             argvals[0] = result;
@@ -2108,7 +2120,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 if (!isa<Function>(llvmf) || cast<Function>(llvmf)->isIntrinsic() || cast<Function>(llvmf)->getFunctionType() != functype)
                     llvmf = NULL;
             }
-            else if (f_name.startswith("llvm.")) {
+            else if (f_name.starts_with("llvm.")) {
                 // compute and verify auto-mangling for intrinsic name
                 auto ID = Function::lookupIntrinsicID(f_name);
                 if (ID != Intrinsic::not_intrinsic) {
@@ -2230,7 +2242,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 Value *strct = emit_allocobj(ctx, (jl_datatype_t*)rt, true);
                 setName(ctx.emission_context, strct, "ccall_ret_box");
                 MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
-                int boxalign = julia_alignment(rt);
+                Align boxalign(julia_alignment(rt));
                 // copy the data from the return value to the new struct
                 const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
                 auto resultTy = result->getType();
@@ -2238,10 +2250,9 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 if (DL.getTypeStoreSize(resultTy) > rtsz) {
                     // ARM and AArch64 can use a LLVM type larger than the julia type.
                     // When this happens, cast through memory.
-                    auto slot = emit_static_alloca(ctx, resultTy);
+                    auto slot = emit_static_alloca(ctx, resultTy, boxalign);
                     setName(ctx.emission_context, slot, "type_pun_slot");
-                    slot->setAlignment(Align(boxalign));
-                    ctx.builder.CreateAlignedStore(result, slot, Align(boxalign));
+                    ctx.builder.CreateAlignedStore(result, slot, boxalign);
                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
                     emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign);
                 }
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index c78e6092ca5db..8557698a4e513 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -833,28 +833,6 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
         mapAddresses(Dyld, ro_alloc);
         mapAddresses(Dyld, exe_alloc);
     }
-#ifdef _OS_WINDOWS_
-    template <typename Alloc>
-    void *lookupWriteAddressFor(void *rt_addr, Alloc &&allocator)
-    {
-        for (auto &alloc: allocator->allocations) {
-            if (alloc.rt_addr == rt_addr) {
-                return alloc.wr_addr;
-            }
-        }
-        return nullptr;
-    }
-    void *lookupWriteAddressFor(void *rt_addr)
-    {
-        if (!ro_alloc)
-            return rt_addr;
-        if (void *ptr = lookupWriteAddressFor(rt_addr, ro_alloc))
-            return ptr;
-        if (void *ptr = lookupWriteAddressFor(rt_addr, exe_alloc))
-            return ptr;
-        return rt_addr;
-    }
-#endif // _OS_WINDOWS_
 };
 
 uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
@@ -947,13 +925,6 @@ void RTDyldMemoryManagerJL::deregisterEHFrames(uint8_t *Addr,
 
 }
 
-#ifdef _OS_WINDOWS_
-void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr)
-{
-    return ((RTDyldMemoryManagerJL*)memmgr)->lookupWriteAddressFor(rt_addr);
-}
-#endif
-
 RTDyldMemoryManager* createRTDyldMemoryManager()
 {
     return new RTDyldMemoryManagerJL();
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 613d7ae719448..4547e693755cd 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -130,14 +130,8 @@ static Value *stringConstPtr(
     }
     // Doesn't need to be aligned, we shouldn't operate on these like julia objects
     GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, Align(1), "_j_str_" + StringRef(ctxt.data(), ctxt.size()), *M);
-    Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0);
-    Value *Args[] = { zero, zero };
-    auto gep = irbuilder.CreateInBoundsGEP(gv->getValueType(),
-                                       // AddrSpaceCast in case globals are in non-0 AS
-                                       irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0)),
-                                       Args);
-    setName(emission_context, gep, "string_const_ptr");
-    return gep;
+    // AddrSpaceCast in case globals are in non-0 AS
+    return irbuilder.CreateAddrSpaceCast(gv, gv->getValueType()->getPointerTo(0));
 }
 
 
@@ -315,7 +309,7 @@ static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
 }
 
 static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt);
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile=false);
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value* dest, MDNode *tbaa_dest, Align alignment, bool isVolatile=false);
 
 static bool type_is_permalloc(jl_value_t *typ)
 {
@@ -329,6 +323,8 @@ static bool type_is_permalloc(jl_value_t *typ)
 }
 
 
+// find the offset of pointer fields which never need a write barrier since their type-analysis
+// shows they are permanently rooted
 static void find_perm_offsets(jl_datatype_t *typ, SmallVectorImpl<unsigned> &res, unsigned offset)
 {
     // This is a inlined field at `offset`.
@@ -352,14 +348,37 @@ static void find_perm_offsets(jl_datatype_t *typ, SmallVectorImpl<unsigned> &res
     }
 }
 
-static llvm::SmallVector<llvm::Value*, 0> get_gc_roots_for(jl_codectx_t &ctx, const jl_cgval_t &x)
+// load a pointer to N inlined_roots into registers (as a SmallVector)
+static llvm::SmallVector<Value*,0> load_gc_roots(jl_codectx_t &ctx, Value *inline_roots_ptr, size_t npointers, bool isVolatile=false)
+{
+    SmallVector<Value*,0> gcroots(npointers);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    auto roots_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    for (size_t i = 0; i < npointers; i++) {
+        auto *ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(jl_value_t*)), Align(sizeof(void*)), isVolatile);
+        roots_ai.decorateInst(ptr);
+        gcroots[i] = ptr;
+    }
+    return gcroots;
+}
+
+// inlined bool indicates whether this must return the inlined roots inside x separately, or whether x itself may be used as the root (if x is already isboxed)
+static llvm::SmallVector<Value*,0> get_gc_roots_for(jl_codectx_t &ctx, const jl_cgval_t &x, bool inlined=false)
 {
     if (x.constant || x.typ == jl_bottom_type)
         return {};
-    if (x.Vboxed) // superset of x.isboxed
+    if (!inlined && x.Vboxed) // superset of x.isboxed
         return {x.Vboxed};
-    assert(!x.isboxed);
-    if (x.ispointer()) {
+    assert(!x.isboxed || !inlined);
+    if (!x.inline_roots.empty()) {
+        // if (!inlined) { // TODO: implement this filter operation
+        //     SmallVector<unsigned,4> perm_offsets;
+        //     find_perm_offsets(typ, perm_offsets, 0);
+        //     return filter(!in(perm_offsets), x.inline_roots)
+        // }
+        return x.inline_roots;
+    }
+    if (!inlined && x.ispointer()) {
         assert(x.V);
         assert(x.V->getType()->getPointerAddressSpace() != AddressSpace::Tracked);
         return {x.V};
@@ -369,8 +388,7 @@ static llvm::SmallVector<llvm::Value*, 0> get_gc_roots_for(jl_codectx_t &ctx, co
         Type *T = julia_type_to_llvm(ctx, jltype);
         Value *agg = emit_unbox(ctx, T, x, jltype);
         SmallVector<unsigned,4> perm_offsets;
-        if (jltype && jl_is_datatype(jltype) && ((jl_datatype_t*)jltype)->layout)
-            find_perm_offsets((jl_datatype_t*)jltype, perm_offsets, 0);
+        find_perm_offsets((jl_datatype_t*)jltype, perm_offsets, 0);
         return ExtractTrackedValues(agg, agg->getType(), false, ctx.builder, perm_offsets);
     }
     // nothing here to root, move along
@@ -568,23 +586,6 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
     return load;
 }
 
-// Returns ctx.types().T_pjlvalue
-static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p)
-{
-    // emit a pointer to any jl_value_t which will be valid across reloading code
-    if (p == NULL)
-        return Constant::getNullValue(ctx.types().T_pjlvalue);
-    // bindings are prefixed with jl_bnd#
-    jl_globalref_t *gr = p->globalref;
-    Value *pgv = gr ? julia_pgv(ctx, "jl_bnd#", gr->name, gr->mod, p) : julia_pgv(ctx, "jl_bnd#", p);
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-    auto load = ai.decorateInst(maybe_mark_load_dereferenceable(
-            ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
-            false, sizeof(jl_binding_t), alignof(jl_binding_t)));
-    setName(ctx.emission_context, load, pgv->getName());
-    return load;
-}
-
 // bitcast a value, but preserve its address space when dealing with pointer types
 static Value *emit_bitcast(jl_codectx_t &ctx, Value *v, Type *jl_value)
 {
@@ -638,19 +639,13 @@ static unsigned convert_struct_offset(jl_codectx_t &ctx, Type *lty, unsigned byt
     return convert_struct_offset(ctx.builder.GetInsertBlock()->getModule()->getDataLayout(), lty, byte_offset);
 }
 
-static Value *emit_struct_gep(jl_codectx_t &ctx, Type *lty, Value *base, unsigned byte_offset)
-{
-    unsigned idx = convert_struct_offset(ctx, lty, byte_offset);
-    return ctx.builder.CreateConstInBoundsGEP2_32(lty, base, 0, idx);
-}
-
 static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall=false);
 
 static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed)
 {
     // this function converts a Julia Type into the equivalent LLVM type
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bottom_type)
+    if (jt == (jl_value_t*)jl_bottom_type || jt == (jl_value_t*)jl_typeofbottom_type || jt == (jl_value_t*)jl_typeofbottom_type->super)
         return getVoidTy(ctxt);
     if (jl_is_concrete_immutable(jt)) {
         if (jl_datatype_nbits(jt) == 0)
@@ -760,7 +755,7 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt,
     // use this where C-compatible (unboxed) structs are desired
     // use julia_type_to_llvm directly when you want to preserve Julia's type semantics
     if (isboxed) *isboxed = false;
-    if (jt == (jl_value_t*)jl_bottom_type)
+    if (jt == (jl_value_t*)jl_bottom_type || jt == (jl_value_t*)jl_typeofbottom_type || jt == (jl_value_t*)jl_typeofbottom_type->super)
         return getVoidTy(ctxt);
     if (jl_is_primitivetype(jt))
         return bitstype_to_llvm(jt, ctxt, llvmcall);
@@ -948,6 +943,9 @@ static bool for_each_uniontype_small(
         allunbox &= for_each_uniontype_small(f, ((jl_uniontype_t*)ty)->b, counter);
         return allunbox;
     }
+    else if (ty == (jl_value_t*)jl_typeofbottom_type->super) {
+        f(++counter, jl_typeofbottom_type); // treat Tuple{union{}} as identical to typeof(Union{})
+    }
     else if (jl_is_pointerfree(ty)) {
         f(++counter, (jl_datatype_t*)ty);
         return true;
@@ -1006,11 +1004,10 @@ static Value *data_pointer(jl_codectx_t &ctx, const jl_cgval_t &x)
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                             jl_aliasinfo_t const &src_ai, uint64_t sz, unsigned align_dst, unsigned align_src, bool is_volatile)
+                             jl_aliasinfo_t const &src_ai, uint64_t sz, Align align_dst, Align align_src, bool is_volatile)
 {
     if (sz == 0)
         return;
-    assert(align_dst && "align must be specified");
     #if JL_LLVM_VERSION < 170000
     // If the types are small and simple, use load and store directly.
     // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int
@@ -1053,7 +1050,7 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
             if (isa<Instruction>(dst) && !dst->hasName())
                 setName(ctx.emission_context, dst, "memcpy_refined_dst");
             auto val = src_ai.decorateInst(ctx.builder.CreateAlignedLoad(directel, src, MaybeAlign(align_src), is_volatile));
-            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, Align(align_dst), is_volatile));
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(val, dst, align_dst, is_volatile));
             ++SkippedMemcpys;
             return;
         }
@@ -1072,12 +1069,12 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     // above problem won't be as serious.
 
     auto merged_ai = dst_ai.merge(src_ai);
-    ctx.builder.CreateMemCpy(dst, Align(align_dst), src, Align(align_src), sz, is_volatile,
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                             jl_aliasinfo_t const &src_ai, Value *sz, unsigned align_dst, unsigned align_src, bool is_volatile)
+                             jl_aliasinfo_t const &src_ai, Value *sz, Align align_dst, Align align_src, bool is_volatile)
 {
     if (auto const_sz = dyn_cast<ConstantInt>(sz)) {
         emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, const_sz->getZExtValue(), align_dst, align_src, is_volatile);
@@ -1086,25 +1083,266 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const
     ++EmittedMemcpys;
 
     auto merged_ai = dst_ai.merge(src_ai);
-    ctx.builder.CreateMemCpy(dst, MaybeAlign(align_dst), src, MaybeAlign(align_src), sz, is_volatile,
+    ctx.builder.CreateMemCpy(dst, align_dst, src, align_src, sz, is_volatile,
                              merged_ai.tbaa, merged_ai.tbaa_struct, merged_ai.scope, merged_ai.noalias);
 }
 
 template<typename T1>
 static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, Value *src,
-                        jl_aliasinfo_t const &src_ai, T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false)
+                        jl_aliasinfo_t const &src_ai, T1 &&sz, Align align_dst, Align align_src, bool is_volatile=false)
 {
     emit_memcpy_llvm(ctx, dst, dst_ai, src, src_ai, sz, align_dst, align_src, is_volatile);
 }
 
 template<typename T1>
 static void emit_memcpy(jl_codectx_t &ctx, Value *dst, jl_aliasinfo_t const &dst_ai, const jl_cgval_t &src,
-                        T1 &&sz, unsigned align_dst, unsigned align_src, bool is_volatile=false)
+                        T1 &&sz, Align align_dst, Align align_src, bool is_volatile=false)
 {
     auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, src.tbaa);
     emit_memcpy_llvm(ctx, dst, dst_ai, data_pointer(ctx, src), src_ai, sz, align_dst, align_src, is_volatile);
 }
 
+static bool allpointers(jl_datatype_t *typ)
+{
+    return jl_datatype_size(typ) == typ->layout->npointers * sizeof(void*);
+}
+
+// compute the space required by split_value_into, by simulating it
+// returns (sizeof(split_value), n_pointers)
+static std::pair<size_t,size_t> split_value_size(jl_datatype_t *typ)
+{
+    assert(jl_is_datatype(typ));
+    size_t dst_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    // drop the data pointer if the entire structure is just pointers
+    // TODO: eventually we could drop the slots for the pointers from inside the
+    //       types to pack it together, but this can change the alignment of the bits
+    //       in the fields inside, even if those bits have no pointers themselves. So
+    //       we would actually need to compute, for each pointer, whether any
+    //       subsequent field needed the extra alignment (for example, we can
+    //       drop space for any runs of two/four pointer).  Some of these
+    //       functions are already written in a way to support that, but not
+    //       fully implemented yet.
+    bool nodata = allpointers(typ);
+    if (nodata)
+        dst_off = 0;
+    else
+        dst_off = jl_datatype_size(typ);
+    return std::make_pair(dst_off, npointers);
+}
+
+// take a value `x` and split its bits into dst and the roots into inline_roots
+static void split_value_into(jl_codectx_t &ctx, const jl_cgval_t &x, Align align_src, Value *dst, Align align_dst, jl_aliasinfo_t const &dst_ai, Value *inline_roots_ptr, jl_aliasinfo_t const &roots_ai, bool isVolatileStore=false)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    if (!x.inline_roots.empty()) {
+        auto sizes = split_value_size(typ);
+        if (sizes.first > 0)
+            emit_memcpy(ctx, dst, dst_ai, x.V, src_ai, sizes.first, align_dst, align_src, isVolatileStore);
+        for (size_t i = 0; i < sizes.second; i++) {
+            Value *unbox = x.inline_roots[i];
+            roots_ai.decorateInst(ctx.builder.CreateAlignedStore(unbox, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(void*)), Align(sizeof(void*)), isVolatileStore));
+        }
+        return;
+    }
+    if (inline_roots_ptr == nullptr) {
+        emit_unbox_store(ctx, x, dst, ctx.tbaa().tbaa_stack, align_dst, isVolatileStore);
+        return;
+    }
+    Value *src = data_pointer(ctx, value_to_pointer(ctx, x));
+    bool isstack = isa<AllocaInst>(src->stripInBoundsOffsets()) || src_ai.tbaa == ctx.tbaa().tbaa_stack;
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > src_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - src_off,
+                align_dst,
+                align_src,
+                isVolatileStore);
+            dst_off += ptr - src_off;
+        }
+        if (last)
+            break;
+        auto *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, src, ptr), Align(sizeof(void*)));
+        if (!isstack)
+            load->setOrdering(AtomicOrdering::Unordered);
+        src_ai.decorateInst(load);
+        roots_ai.decorateInst(ctx.builder.CreateAlignedStore(load, emit_ptrgep(ctx, inline_roots_ptr, i * sizeof(void*)), Align(sizeof(void*)), isVolatileStore));
+        align_src = align_dst = Align(sizeof(void*));
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            // store an undef pointer here, to make sure nobody looks at this
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(
+                ctx.builder.getIntN(sizeof(void*) * 8, (uint64_t)-1),
+                emit_ptrgep(ctx, dst, dst_off),
+                align_src,
+                isVolatileStore));
+            dst_off += sizeof(void*);
+            assert(dst_off == src_off);
+        }
+    }
+}
+
+static void split_value_into(jl_codectx_t &ctx, const jl_cgval_t &x, Align align_src, Value *dst, Align align_dst, jl_aliasinfo_t const &dst_ai, MutableArrayRef<Value*> inline_roots)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    Type *T_prjlvalue = ctx.types().T_prjlvalue;
+    if (!x.inline_roots.empty()) {
+        auto sizes = split_value_size(typ);
+        if (sizes.first > 0)
+            emit_memcpy(ctx, dst, dst_ai, x.V, src_ai, sizes.first, align_dst, align_src);
+        for (size_t i = 0; i < sizes.second; i++)
+            inline_roots[i] = x.inline_roots[i];
+        return;
+    }
+    if (inline_roots.empty()) {
+        emit_unbox_store(ctx, x, dst, ctx.tbaa().tbaa_stack, align_dst);
+        return;
+    }
+    Value *src = data_pointer(ctx, value_to_pointer(ctx, x));
+    bool isstack = isa<AllocaInst>(src->stripInBoundsOffsets()) || src_ai.tbaa == ctx.tbaa().tbaa_stack;
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    bool hasptr = typ->layout->first_ptr >= 0;
+    size_t npointers = hasptr ? typ->layout->npointers : 0;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > src_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - src_off,
+                align_dst,
+                align_src);
+            dst_off += ptr - src_off;
+        }
+        if (last)
+            break;
+        auto *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_ptrgep(ctx, src, ptr), Align(sizeof(void*)));
+        if (!isstack)
+            load->setOrdering(AtomicOrdering::Unordered);
+        src_ai.decorateInst(load);
+        inline_roots[i] = load;
+        align_src = align_dst = Align(sizeof(void*));
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            // store an undef pointer here, to make sure nobody looks at this
+            dst_ai.decorateInst(ctx.builder.CreateAlignedStore(
+                ctx.builder.getIntN(sizeof(void*) * 8, (uint64_t)-1),
+                emit_ptrgep(ctx, dst, dst_off),
+                align_src));
+            dst_off += sizeof(void*);
+            assert(dst_off == src_off);
+        }
+    }
+}
+
+static std::pair<AllocaInst*, SmallVector<Value*,0>> split_value(jl_codectx_t &ctx, const jl_cgval_t &x, Align x_alignment)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    auto sizes = split_value_size(typ);
+    Align align_dst(julia_alignment((jl_value_t*)typ));
+    AllocaInst *bits = sizes.first > 0 ? emit_static_alloca(ctx, sizes.first, align_dst) : nullptr;
+    SmallVector<Value*,0> roots(sizes.second);
+    auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+    split_value_into(ctx, x, x_alignment, bits, align_dst, stack_ai, MutableArrayRef(roots));
+    return std::make_pair(bits, roots);
+}
+
+// Return the offset values corresponding to jl_field_offset, but into the two buffers for a split value (or -1)
+static std::pair<ssize_t,ssize_t> split_value_field(jl_datatype_t *typ, unsigned idx)
+{
+    size_t fldoff = jl_field_offset(typ, idx);
+    size_t src_off = 0;
+    size_t dst_off = 0;
+    assert(typ->layout->first_ptr >= 0);
+    size_t npointers = typ->layout->npointers;
+    bool nodata = allpointers(typ);
+    for (size_t i = 0; i < npointers; i++) {
+        size_t ptr = jl_ptr_offset(typ, i) * sizeof(void*);
+        if (ptr >= fldoff) {
+            if (ptr >= fldoff + jl_field_size(typ, idx))
+                break;
+            bool onlyptr = jl_field_isptr(typ, idx) || allpointers((jl_datatype_t*)jl_field_type(typ, idx));
+            return std::make_pair(onlyptr ? -1 : dst_off + fldoff - src_off, i);
+        }
+        dst_off += ptr - src_off;
+        src_off = ptr + sizeof(void*);
+        if (!nodata) {
+            assert(dst_off + sizeof(void*) == src_off);
+            dst_off = src_off;
+        }
+    }
+    return std::make_pair(dst_off + fldoff - src_off, -1);
+}
+
+// Copy `x` to `dst`, where `x` was a split value and dst needs to have a native layout, copying any inlined roots back into their native location.
+// This does not respect roots, so you must call emit_write_multibarrier afterwards.
+static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatileStore)
+{
+    jl_datatype_t *typ = (jl_datatype_t*)x.typ;
+    assert(jl_is_concrete_type(x.typ));
+    assert(typ->layout->first_ptr >= 0 && !x.inline_roots.empty());
+    Align align_dst = alignment;
+    Align align_src(julia_alignment(x.typ));
+    Value *src = x.V;
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    size_t dst_off = 0;
+    size_t src_off = 0;
+    size_t npointers = typ->layout->npointers;
+    bool nodata = allpointers(typ);
+    bool isstack = isa<AllocaInst>(dst->stripInBoundsOffsets()) || dst_ai.tbaa == ctx.tbaa().tbaa_stack;
+    for (size_t i = 0; true; i++) {
+        bool last = i == npointers;
+        size_t ptr = last ? jl_datatype_size(typ) : (jl_ptr_offset(typ, i) * sizeof(void*));
+        if (ptr > dst_off) {
+            emit_memcpy(ctx,
+                emit_ptrgep(ctx, dst, dst_off),
+                dst_ai,
+                emit_ptrgep(ctx, src, src_off),
+                src_ai,
+                ptr - dst_off,
+                align_dst,
+                align_src,
+                isVolatileStore);
+            src_off += ptr - dst_off;
+        }
+        if (last)
+            break;
+        auto *root = x.inline_roots[i];
+        auto *store = ctx.builder.CreateAlignedStore(root, emit_ptrgep(ctx, dst, ptr), Align(sizeof(void*)), isVolatileStore);
+        if (!isstack)
+            store->setOrdering(AtomicOrdering::Unordered);
+        dst_ai.decorateInst(store);
+        align_dst = align_src = Align(sizeof(void*));
+        dst_off = ptr + sizeof(void*);
+        if (!nodata) {
+            assert(src_off + sizeof(void*) == dst_off);
+            src_off = dst_off;
+        }
+    }
+}
+
 static Value *emit_tagfrom(jl_codectx_t &ctx, jl_datatype_t *dt)
 {
     if (dt->smalltag)
@@ -1215,10 +1453,10 @@ static Value *emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p, bool maybenull
 static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt)
 {
     Value *Ptr = decay_derived(ctx, dt);
-    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, types) / sizeof(void*));
+    unsigned Idx = offsetof(jl_datatype_t, types);
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto types = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-                ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*))));
+                ctx.types().T_pjlvalue, emit_ptrgep(ctx, Ptr, Idx), Align(sizeof(void*))));
     setName(ctx.emission_context, types, "datatype_types");
     return types;
 }
@@ -1237,16 +1475,13 @@ static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt, bool add_isunion=
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *Ptr = decay_derived(ctx, dt);
-    Value *Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_t, layout) / sizeof(int32_t*));
-    Ptr = ctx.builder.CreateInBoundsGEP(getPointerTy(ctx.builder.getContext()), Ptr, Idx);
+    Ptr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_t, layout));
     Ptr = ai.decorateInst(ctx.builder.CreateAlignedLoad(getPointerTy(ctx.builder.getContext()), Ptr, Align(sizeof(int32_t*))));
-    Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, size) / sizeof(int32_t));
-    Value *SizePtr = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx);
+    Value *SizePtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, size));
     Value *Size = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), SizePtr, Align(sizeof(int32_t))));
     setName(ctx.emission_context, Size, "datatype_size");
     if (add_isunion) {
-        Idx = ConstantInt::get(ctx.types().T_size, offsetof(jl_datatype_layout_t, flags) / sizeof(int8_t));
-        Value *FlagPtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), Ptr, Idx);
+        Value *FlagPtr = emit_ptrgep(ctx, Ptr, offsetof(jl_datatype_layout_t, flags));
         Value *Flag = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), FlagPtr, Align(sizeof(int16_t))));
         Flag = ctx.builder.CreateLShr(Flag, 4);
         Flag = ctx.builder.CreateAnd(Flag, ConstantInt::get(Flag->getType(), 1));
@@ -1323,7 +1558,7 @@ static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 {
     Value *isprimitive;
-    isprimitive = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isprimitive = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     isprimitive = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isprimitive, Align(1)));
     isprimitive = ctx.builder.CreateLShr(isprimitive, 7);
@@ -1335,10 +1570,7 @@ static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *typ)
 static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 {
     unsigned n = offsetof(jl_datatype_t, name) / sizeof(char*);
-    Value *vptr = ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_pjlvalue,
-            maybe_decay_tracked(ctx, dt),
-            ConstantInt::get(ctx.types().T_size, n));
+    Value *vptr = emit_ptrgep(ctx, maybe_decay_tracked(ctx, dt), n * sizeof(jl_value_t*));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     auto name = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, vptr, Align(sizeof(void*))));
     setName(ctx.emission_context, name, "datatype_name");
@@ -1454,15 +1686,23 @@ static void null_load_check(jl_codectx_t &ctx, Value *v, jl_module_t *scope, jl_
 }
 
 template<typename Func>
-static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
+static void emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, MutableArrayRef<Value*> defval, Func &&func)
 {
-    if (!ifnot) {
-        return func();
+    if (ifnot == nullptr) {
+        auto res = func();
+        assert(res.size() == defval.size());
+        for (size_t i = 0; i < defval.size(); i++)
+            defval[i] = res[i];
+        return;
     }
     if (auto Cond = dyn_cast<ConstantInt>(ifnot)) {
         if (Cond->isZero())
-            return defval;
-        return func();
+            return;
+        auto res = func();
+        assert(res.size() == defval.size());
+        for (size_t i = 0; i < defval.size(); i++)
+            defval[i] = res[i];
+        return;
     }
     ++EmittedGuards;
     BasicBlock *currBB = ctx.builder.GetInsertBlock();
@@ -1471,16 +1711,33 @@ static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval,
     ctx.builder.CreateCondBr(ifnot, passBB, exitBB);
     ctx.builder.SetInsertPoint(passBB);
     auto res = func();
+    assert(res.size() == defval.size());
     passBB = ctx.builder.GetInsertBlock();
     ctx.builder.CreateBr(exitBB);
     ctx.builder.SetInsertPoint(exitBB);
-    if (defval == nullptr)
+    for (size_t i = 0; i < defval.size(); i++) {
+        PHINode *phi = ctx.builder.CreatePHI(defval[i]->getType(), 2);
+        phi->addIncoming(defval[i], currBB);
+        phi->addIncoming(res[i], passBB);
+        setName(ctx.emission_context, phi, "guard_res");
+        defval[i] = phi;
+    }
+}
+
+template<typename Func>
+static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
+{
+    MutableArrayRef res(&defval, defval == nullptr ? 0 : 1);
+    auto funcwrap = [&func] () -> SmallVector<Value*,1> {
+        auto res = func();
+        if (res == nullptr)
+            return {};
+        return {res};
+    };
+    emit_guarded_test(ctx, ifnot, res, funcwrap);
+    if (res.empty())
         return nullptr;
-    PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2);
-    phi->addIncoming(defval, currBB);
-    phi->addIncoming(res, passBB);
-    setName(ctx.emission_context, phi, "guard_res");
-    return phi;
+    return res[0];
 }
 
 template<typename Func>
@@ -1537,7 +1794,7 @@ static Value *emit_typeof(jl_codectx_t &ctx, Value *v, bool maybenull, bool just
             // we lied a bit: this wasn't really an object (though it was valid for GC rooting)
             // and we need to use it as an index to get the real object now
             Module *M = jl_Module;
-            Value *smallp = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), prepare_global_in(M, jl_small_typeof_var), tag);
+            Value *smallp = emit_ptrgep(ctx, prepare_global_in(M, jl_small_typeof_var), tag);
             jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
             auto small = ctx.builder.CreateAlignedLoad(typetag->getType(), smallp, M->getDataLayout().getPointerABIAlignment(0));
             small->setMetadata(LLVMContext::MD_nonnull, MDNode::get(M->getContext(), None));
@@ -1692,6 +1949,8 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         if (intersected_type == (jl_value_t*)jl_bottom_type)
             known_isa = false;
     }
+    if (intersected_type == (jl_value_t*)jl_typeofbottom_type->super)
+        intersected_type = (jl_value_t*)jl_typeofbottom_type; // swap abstract Type{Union{}} for concrete typeof(Union{})
     if (known_isa) {
         if (!*known_isa && !msg.isTriviallyEmpty()) {
             emit_type_error(ctx, x, literal_pointer_val(ctx, type), msg);
@@ -1786,7 +2045,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
 // declare that the pointer is legal (for zero bytes) even though it might be undef.
 static Value *emit_isa_and_defined(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ)
 {
-    return emit_nullcheck_guard(ctx, val.ispointer() ? val.V : nullptr, [&] {
+    return emit_nullcheck_guard(ctx, val.inline_roots.empty() && val.ispointer() ? val.V : nullptr, [&] {
         return emit_isa(ctx, val, typ, Twine()).first;
     });
 }
@@ -1815,7 +2074,7 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
 {
     Value *isconcrete;
-    isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isconcrete = emit_ptrgep(ctx, decay_derived(ctx, typ), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     isconcrete = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1)));
     isconcrete = ctx.builder.CreateLShr(isconcrete, 1);
@@ -1869,6 +2128,9 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
             if (ainfo.isghost) {
                 a = Constant::getNullValue(getPointerTy(ctx.builder.getContext()));
             }
+            else if (!ainfo.inline_roots.empty()) {
+                a = value_to_pointer(ctx, ainfo).V;
+            }
             else if (!ainfo.ispointer()) {
                 // CreateAlloca is OK here since we are on an error branch
                 Value *tempSpace = ctx.builder.CreateAlloca(a->getType());
@@ -1900,6 +2162,7 @@ static Value *CreateSimplifiedExtractValue(jl_codectx_t &ctx, Value *Agg, ArrayR
 static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef<Value*>);
 static void emit_write_barrier(jl_codectx_t&, Value*, Value*);
 static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*, jl_value_t*);
+static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, const jl_cgval_t &x);
 
 SmallVector<unsigned, 0> first_ptr(Type *T)
 {
@@ -1961,85 +2224,81 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
                              bool maybe_null_if_boxed = true, unsigned alignment = 0,
                              Value **nullcheck = nullptr)
 {
-    // TODO: we should use unordered loads for anything with CountTrackedPointers(elty).count > 0 (if not otherwise locked)
     Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
     if (type_is_ghost(elty)) {
         if (isStrongerThanMonotonic(Order))
             ctx.builder.CreateFence(Order);
         return ghostValue(ctx, jltype);
     }
+    if (isboxed)
+        alignment = sizeof(void*);
+    else if (!alignment)
+        alignment = julia_alignment(jltype);
+    if (idx_0based)
+        ptr = ctx.builder.CreateInBoundsGEP(elty, ptr, idx_0based);
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type
     AllocaInst *intcast = NULL;
-    if (Order == AtomicOrdering::NotAtomic) {
-        if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) {
-            intcast = emit_static_alloca(ctx, elty);
-            setName(ctx.emission_context, intcast, "aggregate_load_box");
+    if (Order == AtomicOrdering::NotAtomic && !isboxed && !aliasscope && elty->isAggregateType() && !jl_is_genericmemoryref_type(jltype)) {
+        // use split_value to do this load
+        auto src = mark_julia_slot(ptr, jltype, NULL, tbaa);
+        auto copy = split_value(ctx, src, Align(alignment));
+        if (maybe_null_if_boxed && !copy.second.empty()) {
+            null_pointer_check(ctx, copy.second[0], nullcheck);
         }
+        return mark_julia_slot(copy.first, jltype, NULL, ctx.tbaa().tbaa_stack, copy.second);
     }
-    else {
+    Type *realelty = elty;
+    if (Order != AtomicOrdering::NotAtomic) {
         if (!isboxed && !elty->isIntOrPtrTy()) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "atomic_load_box");
-            elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
+            realelty = elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
+        }
+        if (isa<IntegerType>(elty)) {
+            unsigned nb2 = PowerOf2Ceil(nb);
+            if (nb != nb2)
+                elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
         }
     }
-    Type *realelty = elty;
-    if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
-        unsigned nb2 = PowerOf2Ceil(nb);
-        if (nb != nb2)
-            elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
-    }
-    Value *data = ptr;
-    if (idx_0based)
-        data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based);
     Value *instr = nullptr;
-    if (isboxed)
-        alignment = sizeof(void*);
-    else if (!alignment)
-        alignment = julia_alignment(jltype);
-    if (intcast && Order == AtomicOrdering::NotAtomic) {
-        emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, alignment, intcast->getAlign().value());
+    if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
+        // load these FCA as individual fields, so LLVM does not need to split them later
+        Value *fld0 = ctx.builder.CreateStructGEP(elty, ptr, 0);
+        LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false);
+        load0->setOrdering(Order);
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        ai.scope = MDNode::concatenate(aliasscope, ai.scope);
+        ai.decorateInst(load0);
+        Value *fld1 = ctx.builder.CreateStructGEP(elty, ptr, 1);
+        LoadInst *load1 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(1), fld1, Align(alignment), false);
+        static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order");
+        maybe_mark_load_dereferenceable(load1, true, sizeof(void*)*2, alignof(void*));
+        load1->setOrdering(Order);
+        ai.decorateInst(load1);
+        instr = Constant::getNullValue(elty);
+        instr = ctx.builder.CreateInsertValue(instr, load0, 0);
+        instr = ctx.builder.CreateInsertValue(instr, load1, 1);
     }
     else {
-        if (!isboxed && jl_is_genericmemoryref_type(jltype)) {
-            // load these FCA as individual fields, so LLVM does not need to split them later
-            Value *fld0 = ctx.builder.CreateStructGEP(elty, data, 0);
-            LoadInst *load0 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(0), fld0, Align(alignment), false);
-            load0->setOrdering(Order);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-            ai.scope = MDNode::concatenate(aliasscope, ai.scope);
-            ai.decorateInst(load0);
-            Value *fld1 = ctx.builder.CreateStructGEP(elty, data, 1);
-            LoadInst *load1 = ctx.builder.CreateAlignedLoad(elty->getStructElementType(1), fld1, Align(alignment), false);
-            static_assert(offsetof(jl_genericmemoryref_t, ptr_or_offset) == 0, "wrong field order");
-            maybe_mark_load_dereferenceable(load1, true, sizeof(void*)*2, alignof(void*));
-            load1->setOrdering(Order);
-            ai.decorateInst(load1);
-            instr = Constant::getNullValue(elty);
-            instr = ctx.builder.CreateInsertValue(instr, load0, 0);
-            instr = ctx.builder.CreateInsertValue(instr, load1, 1);
-        }
-        else {
-            LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false);
-            load->setOrdering(Order);
-            if (isboxed)
-                maybe_mark_load_dereferenceable(load, true, jltype);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-            ai.scope = MDNode::concatenate(aliasscope, ai.scope);
-            ai.decorateInst(load);
-            instr = load;
-        }
-        if (elty != realelty)
-            instr = ctx.builder.CreateTrunc(instr, realelty);
-        if (intcast) {
-            ctx.builder.CreateStore(instr, intcast);
-            instr = nullptr;
-        }
+        LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment), false);
+        load->setOrdering(Order);
+        if (isboxed)
+            maybe_mark_load_dereferenceable(load, true, jltype);
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        ai.scope = MDNode::concatenate(aliasscope, ai.scope);
+        ai.decorateInst(load);
+        instr = load;
+    }
+    if (elty != realelty)
+        instr = ctx.builder.CreateTrunc(instr, realelty);
+    if (intcast) {
+        ctx.builder.CreateAlignedStore(instr, intcast, Align(alignment));
+        instr = nullptr;
     }
     if (maybe_null_if_boxed) {
         if (intcast)
-            instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+            instr = ctx.builder.CreateAlignedLoad(intcast->getAllocatedType(), intcast, Align(alignment));
         Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
         if (first_ptr)
             null_pointer_check(ctx, first_ptr, nullcheck);
@@ -2052,7 +2311,7 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) }));
         if (intcast)
-            instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+            instr = ctx.builder.CreateAlignedLoad(intcast->getAllocatedType(), intcast, Align(alignment));
         instr = ctx.builder.CreateTrunc(instr, getInt1Ty(ctx.builder.getContext()));
     }
     if (instr)
@@ -2077,6 +2336,12 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
         }
         else {
+            if (trim_may_error(ctx.params->trim)) {
+                // if we know the return type, we can assume the result is of that type
+                errs() << "ERROR: Dynamic call to setfield/modifyfield\n";
+                errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                print_stacktrace(ctx, ctx.params->trim);
+            }
             Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
             ret = mark_julia_type(ctx, callval, true, jl_any_type);
         }
@@ -2084,6 +2349,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         ret = update_julia_type(ctx, ret, jltype);
         return ret;
     };
+    if (isboxed)
+        alignment = sizeof(void*);
+    else if (!alignment)
+        alignment = julia_alignment(jltype);
     Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
     if (type_is_ghost(elty) ||
             (issetfieldonce && !maybe_null_if_boxed) ||
@@ -2120,12 +2389,15 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         FailOrder = AtomicOrdering::Monotonic;
     unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype);
     AllocaInst *intcast = nullptr;
+    Type *intcast_eltyp = nullptr;
+    bool tracked_pointers = isboxed || CountTrackedPointers(elty).count > 0;
     if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
+        intcast_eltyp = elty;
+        elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
         if (!issetfield) {
-            intcast = emit_static_alloca(ctx, elty);
+            intcast = emit_static_alloca(ctx, elty, Align(alignment));
             setName(ctx.emission_context, intcast, "atomic_store_box");
         }
-        elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb);
     }
     Type *realelty = elty;
     if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
@@ -2134,19 +2406,21 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb2);
     }
     Value *r = nullptr;
-    if (issetfield || isswapfield || isreplacefield || issetfieldonce)  {
-        if (isboxed)
+    if (issetfield || isswapfield || isreplacefield || issetfieldonce)  { // e.g. !ismodifyfield
+        assert(isboxed || rhs.typ == jltype);
+        if (isboxed) {
             r = boxed(ctx, rhs);
-        else if (aliasscope || Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
+        }
+        else if (intcast) {
+            emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+            r = ctx.builder.CreateLoad(realelty, intcast);
+        }
+        else if (aliasscope || Order != AtomicOrdering::NotAtomic || (tracked_pointers && rhs.inline_roots.empty())) {
             r = emit_unbox(ctx, realelty, rhs, jltype);
-            if (realelty != elty)
-                r = ctx.builder.CreateZExt(r, elty);
         }
+        if (realelty != elty)
+            r = ctx.builder.CreateZExt(r, elty);
     }
-    if (isboxed)
-        alignment = sizeof(void*);
-    else if (!alignment)
-        alignment = julia_alignment(jltype);
     Value *instr = nullptr;
     Value *Compare = nullptr;
     Value *Success = nullptr;
@@ -2176,7 +2450,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         }
         else {
             assert(Order == AtomicOrdering::NotAtomic && !isboxed && rhs.typ == jltype);
-            emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
+            emit_unbox_store(ctx, rhs, ptr, tbaa, Align(alignment));
         }
     }
     else if (isswapfield) {
@@ -2223,7 +2497,14 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                     Current->addIncoming(instr, SkipBB);
                     ctx.builder.SetInsertPoint(BB);
                 }
-                Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                cmp = update_julia_type(ctx, cmp, jltype);
+                if (intcast) {
+                    emit_unbox_store(ctx, cmp, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+                    Compare = ctx.builder.CreateLoad(realelty, intcast);
+                }
+                else {
+                    Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                }
                 if (realelty != elty)
                     Compare = ctx.builder.CreateZExt(Compare, elty);
             }
@@ -2270,16 +2551,17 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             if (realelty != elty)
                 realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
             if (intcast) {
+                assert(!isboxed);
                 ctx.builder.CreateStore(realCompare, intcast);
-                if (maybe_null_if_boxed)
-                    realCompare = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                if (tracked_pointers)
+                    realCompare = ctx.builder.CreateLoad(intcast_eltyp, intcast);
             }
-            if (maybe_null_if_boxed) {
-                Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
-                if (first_ptr)
-                    null_load_check(ctx, first_ptr, mod, var);
+            if (maybe_null_if_boxed && tracked_pointers) {
+                Value *first_ptr = isboxed ? realCompare : extract_first_ptr(ctx, realCompare);
+                assert(first_ptr);
+                null_load_check(ctx, first_ptr, mod, var);
             }
-            if (intcast)
+            if (intcast && !tracked_pointers)
                 oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
             else
                 oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
@@ -2287,11 +2569,17 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             if (isboxed) {
                 r = boxed(ctx, rhs);
             }
-            else if (Order != AtomicOrdering::NotAtomic || CountTrackedPointers(realelty).count) {
+            else if (intcast) {
+                emit_unbox_store(ctx, rhs, intcast, ctx.tbaa().tbaa_stack, intcast->getAlign());
+                r = ctx.builder.CreateLoad(realelty, intcast);
+                if (!tracked_pointers) // oldval is a slot, so put the oldval back
+                    ctx.builder.CreateStore(realCompare, intcast);
+            }
+            else if (Order != AtomicOrdering::NotAtomic || (tracked_pointers && rhs.inline_roots.empty())) {
                 r = emit_unbox(ctx, realelty, rhs, jltype);
-                if (realelty != elty)
-                    r = ctx.builder.CreateZExt(r, elty);
             }
+            if (realelty != elty)
+                r = ctx.builder.CreateZExt(r, elty);
             if (needlock)
                 emit_lockstate_value(ctx, needlock, true);
             cmp = oldval;
@@ -2329,7 +2617,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             }
             else {
                 assert(!isboxed && rhs.typ == jltype);
-                emit_unbox_store(ctx, rhs, ptr, tbaa, alignment);
+                emit_unbox_store(ctx, rhs, ptr, tbaa, Align(alignment));
             }
             ctx.builder.CreateBr(DoneBB);
             instr = load;
@@ -2357,9 +2645,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
                     realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
                 if (intcast) {
                     ctx.builder.CreateStore(realinstr, intcast);
+                    // n.b. this oldval is only used for emit_f_is in this branch, so we know a priori that it does not need a gc-root
                     oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
                     if (maybe_null_if_boxed)
-                        realinstr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                        realinstr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
                 }
                 else {
                     oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
@@ -2399,20 +2688,30 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
         ctx.builder.SetInsertPoint(DoneBB);
     if (needlock)
         emit_lockstate_value(ctx, needlock, false);
-    if (parent != NULL) {
+    if (parent != NULL && tracked_pointers && (!isboxed || !type_is_permalloc(rhs.typ))) {
         if (isreplacefield || issetfieldonce) {
-            // TODO: avoid this branch if we aren't making a write barrier
             BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f);
             DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f);
             ctx.builder.CreateCondBr(Success, BB, DoneBB);
             ctx.builder.SetInsertPoint(BB);
         }
         if (r) {
+            if (realelty != elty)
+                r = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, r, realelty));
+            if (intcast) {
+                ctx.builder.CreateStore(r, intcast);
+                r = ctx.builder.CreateLoad(intcast_eltyp, intcast);
+            }
             if (!isboxed)
                 emit_write_multibarrier(ctx, parent, r, rhs.typ);
-            else if (!type_is_permalloc(rhs.typ))
+            else
                 emit_write_barrier(ctx, parent, r);
         }
+        else {
+            assert(!isboxed);
+            assert(!rhs.inline_roots.empty());
+            emit_write_multibarrier(ctx, parent, rhs);
+        }
         if (isreplacefield || issetfieldonce) {
             ctx.builder.CreateBr(DoneBB);
             ctx.builder.SetInsertPoint(DoneBB);
@@ -2431,21 +2730,18 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx,
             instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
         if (intcast) {
             ctx.builder.CreateStore(instr, intcast);
-            instr = nullptr;
+            if (tracked_pointers)
+                instr = ctx.builder.CreateLoad(intcast_eltyp, intcast);
         }
-        if (maybe_null_if_boxed) {
-            if (intcast)
-                instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+        if (maybe_null_if_boxed && tracked_pointers) {
             Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
-            if (first_ptr)
-                null_load_check(ctx, first_ptr, mod, var);
-            if (intcast && !first_ptr)
-                instr = nullptr;
+            assert(first_ptr);
+            null_load_check(ctx, first_ptr, mod, var);
         }
-        if (instr)
-            oldval = mark_julia_type(ctx, instr, isboxed, jltype);
-        else
+        if (intcast && !tracked_pointers)
             oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa().tbaa_stack);
+        else
+            oldval = mark_julia_type(ctx, instr, isboxed, jltype);
         if (isreplacefield) {
             Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
             const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
@@ -2530,7 +2826,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     }
     assert(!jl_is_vecelement_type((jl_value_t*)stt));
 
-    if (!strct.ispointer()) { // unboxed
+    if (strct.inline_roots.empty() && !strct.ispointer()) { // unboxed
         assert(jl_is_concrete_immutable((jl_value_t*)stt));
         bool isboxed = is_datatype_all_pointers(stt);
         jl_svec_t *types = stt->types;
@@ -2586,7 +2882,8 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     }
 
     bool maybeatomic = stt->name->atomicfields != NULL;
-    if (strct.ispointer() && !maybeatomic) { // boxed or stack
+    if ((strct.inline_roots.empty() && strct.ispointer()) && !maybeatomic) { // boxed or stack
+        // COMBAK: inline_roots support could be implemented for this
         if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
             emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
             *ret = jl_cgval_t(); // unreachable
@@ -2662,13 +2959,10 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
     Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0);
     if (fsz > 0 && mutabl) {
         // move value to an immutable stack slot (excluding tindex)
-        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        AllocaInst *lv = emit_static_alloca(ctx, fsz, Align(al));
         setName(ctx.emission_context, lv, "immutable_union");
-        if (al > 1)
-            lv->setAlignment(Align(al));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
-        emit_memcpy(ctx, lv, ai, addr, ai, fsz, al, al);
+        emit_memcpy(ctx, lv, ai, addr, ai, fsz, Align(al), Align(al));
         addr = lv;
     }
     return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, tbaa);
@@ -2833,39 +3127,51 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
     }
     bool maybe_null = field_may_be_null(strct, jt, idx);
     size_t byte_offset = jl_field_offset(jt, idx);
-    if (strct.ispointer()) {
+    if (!strct.inline_roots.empty()) {
+        assert(!isatomic && !needlock);
         auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset);
-        Value *staddr = data_pointer(ctx, strct);
-        bool isboxed;
-        Type *lt = julia_type_to_llvm(ctx, (jl_value_t*)jt, &isboxed);
-        Value *addr;
-        if (isboxed) {
-            // byte_offset == 0 is an important special case here, e.g.
-            // for single field wrapper types. Introducing the bitcast
-            // can pessimize mem2reg
-            if (byte_offset > 0) {
-                addr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()),
-                        staddr,
-                        ConstantInt::get(ctx.types().T_size, byte_offset));
-            }
-            else {
-                addr = staddr;
-            }
+        auto offsets = split_value_field(jt, idx);
+        bool hasptr = offsets.second >= 0;
+        assert(hasptr == jl_field_isptr(jt, idx) || jl_type_hasptr(jfty));
+        ArrayRef<Value*> roots;
+        if (hasptr) {
+            roots = ArrayRef(strct.inline_roots).slice(offsets.second, jl_field_isptr(jt, idx) ? 1 : ((jl_datatype_t*)jfty)->layout->npointers);
+            if (maybe_null)
+                null_pointer_check(ctx, roots[0], nullcheck);
+        }
+        if (jl_field_isptr(jt, idx)) {
+            return mark_julia_type(ctx, roots[0], true, jfty);
+        }
+        Value *addr = offsets.first < 0 ? nullptr : offsets.first == 0 ? strct.V : emit_ptrgep(ctx, strct.V, offsets.first);
+        if (jl_is_uniontype(jfty)) {
+            size_t fsz = 0, al = 0;
+            int union_max = jl_islayout_inline(jfty, &fsz, &al);
+            size_t fsz1 = jl_field_size(jt, idx) - 1;
+            bool isptr = (union_max == 0);
+            assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr;
+            Value *ptindex = emit_ptrgep(ctx, addr, fsz1);
+            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, false, union_max, strct.tbaa);
+        }
+        else if (jfty == (jl_value_t*)jl_bool_type) {
+            unsigned align = jl_field_align(jt, idx);
+            return typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
+                    AtomicOrdering::NotAtomic, maybe_null, align, nullcheck);
         }
         else {
-            if (jl_is_vecelement_type((jl_value_t*)jt))
-                addr = staddr; // VecElement types are unwrapped in LLVM.
-            else if (isa<StructType>(lt))
-                addr = emit_struct_gep(ctx, lt, staddr, byte_offset);
-            else
-                addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx);
-            if (addr != staddr) {
-                setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr"));
-            }
+            return mark_julia_slot(addr, jfty, nullptr, tbaa, roots);
         }
-        if (jl_field_isptr(jt, idx)) {
+    }
+    else if (strct.ispointer()) {
+        auto tbaa = best_field_tbaa(ctx, strct, jt, idx, byte_offset);
+        Value *staddr = data_pointer(ctx, strct);
+        Value *addr;
+        if (jl_is_vecelement_type((jl_value_t*)jt) || byte_offset == 0)
+            addr = staddr; // VecElement types are unwrapped in LLVM.
+        else
+            addr = emit_ptrgep(ctx, staddr, byte_offset);
+        if (addr != staddr)
             setNameWithField(ctx.emission_context, addr, get_objname, jt, idx, Twine("_ptr"));
+        if (jl_field_isptr(jt, idx)) {
             LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*)));
             setNameWithField(ctx.emission_context, Load, get_objname, jt, idx, Twine());
             Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
@@ -2882,14 +3188,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             bool isptr = (union_max == 0);
             assert(!isptr && fsz < jl_field_size(jt, idx)); (void)isptr;
             size_t fsz1 = jl_field_size(jt, idx) - 1;
-            Value *ptindex;
-            if (isboxed) {
-                ptindex = ctx.builder.CreateConstInBoundsGEP1_32(
-                    getInt8Ty(ctx.builder.getContext()), staddr, byte_offset + fsz1);
-            }
-            else {
-                ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz1);
-            }
+            Value *ptindex = emit_ptrgep(ctx, staddr, byte_offset + fsz1);
             auto val = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, strct.tbaa);
             if (val.V && val.V != addr) {
                 setNameWithField(ctx.emission_context, val.V, get_objname, jt, idx, Twine());
@@ -2938,22 +3237,21 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 unsigned st_idx = convert_struct_offset(ctx, T, byte_offset);
                 IntegerType *ET = cast<IntegerType>(T->getStructElementType(st_idx));
                 unsigned align = (ET->getBitWidth() + 7) / 8;
-                lv = emit_static_alloca(ctx, ET);
-                lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align));
+                lv = emit_static_alloca(ctx, fsz, Align(align));
                 // emit all of the align-sized words
                 unsigned i = 0;
                 for (; i < fsz / align; i++) {
                     unsigned fld = st_idx + i;
                     Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(fld));
-                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                    Value *fldp = emit_ptrgep(ctx, lv, i * align);
                     ctx.builder.CreateAlignedStore(fldv, fldp, Align(align));
                 }
                 // emit remaining bytes up to tindex
                 if (i < ptindex - st_idx) {
-                    Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                    Value *staddr = emit_ptrgep(ctx, lv, i * align);
                     for (; i < ptindex - st_idx; i++) {
                         Value *fldv = ctx.builder.CreateExtractValue(obj, ArrayRef<unsigned>(st_idx + i));
-                        Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                        Value *fldp = emit_ptrgep(ctx, staddr, i);
                         ctx.builder.CreateAlignedStore(fldv, fldp, Align(1));
                     }
                 }
@@ -3014,12 +3312,15 @@ static Value *emit_genericmemoryelsize(jl_codectx_t &ctx, Value *v, jl_value_t *
         size_t sz = sty->layout->size;
         if (sty->layout->flags.arrayelem_isunion)
             sz++;
-        return ConstantInt::get(ctx.types().T_size, sz);
+        auto elsize = ConstantInt::get(ctx.types().T_size, sz);
+        return elsize;
     }
     else {
         Value *t = emit_typeof(ctx, v, false, false, true);
         Value *elsize = emit_datatype_size(ctx, t, add_isunion);
-        return ctx.builder.CreateZExt(elsize, ctx.types().T_size);
+        elsize = ctx.builder.CreateZExt(elsize, ctx.types().T_size);
+        setName(ctx.emission_context, elsize, "elsize");
+        return elsize;
     }
 }
 
@@ -3054,6 +3355,7 @@ static Value *emit_genericmemorylen(jl_codectx_t &ctx, Value *addr, jl_value_t *
     MDBuilder MDB(ctx.builder.getContext());
     auto rng = MDB.createRange(Constant::getNullValue(ctx.types().T_size), ConstantInt::get(ctx.types().T_size, genericmemoryype_maxsize(typ)));
     LI->setMetadata(LLVMContext::MD_range, rng);
+    setName(ctx.emission_context, LI, "memory_len");
     return LI;
 }
 
@@ -3063,7 +3365,7 @@ static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_data
     Value *addr = mem;
     addr = decay_derived(ctx, addr);
     addr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, addr, 1);
-    setName(ctx.emission_context, addr, ".data_ptr");
+    setName(ctx.emission_context, addr, "memory_data_ptr");
     PointerType *PPT = cast<PointerType>(ctx.types().T_jlgenericmemory->getElementType(1));
     LoadInst *LI = ctx.builder.CreateAlignedLoad(PPT, addr, Align(sizeof(char*)));
     LI->setOrdering(AtomicOrdering::NotAtomic);
@@ -3075,6 +3377,7 @@ static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_data
         assert(AS == AddressSpace::Loaded);
         ptr = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, ptr });
     }
+    setName(ctx.emission_context, ptr, "memory_data");
     return ptr;
 }
 
@@ -3088,7 +3391,7 @@ static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t)
     LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
     jl_aliasinfo_t aliasinfo_mem = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryown);
     aliasinfo_mem.decorateInst(LI);
-    addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue,  m, JL_SMALL_BYTE_ALIGNMENT / sizeof(void*));
+    addr = emit_ptrgep(ctx, m, JL_SMALL_BYTE_ALIGNMENT);
     Value *foreign = ctx.builder.CreateICmpNE(addr, decay_derived(ctx, LI));
     return emit_guarded_test(ctx, foreign, t, [&] {
             addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_jlgenericmemory, m, 1);
@@ -3104,23 +3407,19 @@ static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t)
 static Value *emit_allocobj(jl_codectx_t &ctx, jl_datatype_t *jt, bool fully_initialized);
 
 static void init_bits_value(jl_codectx_t &ctx, Value *newv, Value *v, MDNode *tbaa,
-                            unsigned alignment = sizeof(void*)) // min alignment in julia's gc is pointer-aligned
+                            Align alignment = Align(sizeof(void*))) // min alignment in julia's gc is pointer-aligned
 {
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
     // newv should already be tagged
-    ai.decorateInst(ctx.builder.CreateAlignedStore(v, newv, Align(alignment)));
+    ai.decorateInst(ctx.builder.CreateAlignedStore(v, newv, alignment));
 }
 
-static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v, MDNode *tbaa)
+static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t &v)
 {
-    // newv should already be tagged
-    if (v.ispointer()) {
-        unsigned align = std::max(julia_alignment(v.typ), (unsigned)sizeof(void*));
-        emit_memcpy(ctx, newv, jl_aliasinfo_t::fromTBAA(ctx, tbaa), v, jl_datatype_size(v.typ), align, julia_alignment(v.typ));
-    }
-    else {
-        init_bits_value(ctx, newv, v.V, tbaa);
-    }
+    MDNode *tbaa = jl_is_mutable(v.typ) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
+    Align newv_align{std::max(julia_alignment(v.typ), (unsigned)sizeof(void*))};
+    newv = maybe_decay_tracked(ctx, newv);
+    emit_unbox_store(ctx, v, newv, tbaa, newv_align);
 }
 
 static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant *constant, jl_value_t *jt)
@@ -3237,7 +3536,7 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
     if (t == getInt1Ty(ctx.builder.getContext()))
         return track_pjlvalue(ctx, julia_bool(ctx, as_value(ctx, t, vinfo)));
 
-    if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
+    if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && vinfo.inline_roots.empty() && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
         if (Constant *c = dyn_cast<Constant>(vinfo.V)) {
             jl_value_t *s = static_constant_instance(jl_Module->getDataLayout(), c, jt);
             if (s) {
@@ -3344,6 +3643,8 @@ static void union_alloca_type(jl_uniontype_t *ut,
             },
             (jl_value_t*)ut,
             counter);
+    if (align > JL_HEAP_ALIGNMENT)
+        align = JL_HEAP_ALIGNMENT;
 }
 
 static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, bool &allunbox, size_t &min_align, size_t &nbytes)
@@ -3352,12 +3653,9 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
     union_alloca_type(ut, allunbox, nbytes, align, min_align);
     if (nbytes > 0) {
         // at least some of the values can live on the stack
-        // try to pick an Integer type size such that SROA will emit reasonable code
-        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align);
-        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        assert(align % min_align == 0);
+        AllocaInst *lv = emit_static_alloca(ctx, nbytes, Align(align));
         setName(ctx.emission_context, lv, "unionalloca");
-        if (align > 1)
-            lv->setAlignment(Align(align));
         return lv;
     }
     return NULL;
@@ -3413,7 +3711,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
                     if (!box) {
                         box = emit_allocobj(ctx, jt, true);
                         setName(ctx.emission_context, box, "unionbox");
-                        init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
+                        init_bits_cgval(ctx, box, vinfo_r);
                     }
                 }
                 tempBB = ctx.builder.GetInsertBlock(); // could have changed
@@ -3536,14 +3834,14 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
         box = box_union(ctx, vinfo, skip_none);
     }
     else {
-        assert(vinfo.V && "Missing data for unboxed value.");
+        assert((vinfo.V || !vinfo.inline_roots.empty()) && "Missing data for unboxed value.");
         assert(jl_is_concrete_immutable(jt) && "This type shouldn't have been unboxed.");
         Type *t = julia_type_to_llvm(ctx, jt);
         assert(!type_is_ghost(t)); // ghost values should have been handled by vinfo.constant above!
         box = _boxed_special(ctx, vinfo, t);
         if (!box) {
             bool do_promote = vinfo.promotion_point;
-            if (do_promote && is_promotable) {
+            if (do_promote && is_promotable && vinfo.inline_roots.empty()) {
                 auto IP = ctx.builder.saveIP();
                 ctx.builder.SetInsertPoint(vinfo.promotion_point);
                 box = emit_allocobj(ctx, (jl_datatype_t*)jt, true);
@@ -3557,13 +3855,14 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo, bool is_promotab
                 // end illegal IR
                 originalAlloca->eraseFromParent();
                 ctx.builder.restoreIP(IP);
-            } else {
+            }
+            else {
                 auto arg_typename = [&] JL_NOTSAFEPOINT {
                     return "box::" + std::string(jl_symbol_name(((jl_datatype_t*)(jt))->name->name));
                 };
                 box = emit_allocobj(ctx, (jl_datatype_t*)jt, true);
                 setName(ctx.emission_context, box, arg_typename);
-                init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
+                init_bits_cgval(ctx, box, vinfo);
             }
         }
     }
@@ -3576,30 +3875,25 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
     if (AllocaInst *ai = dyn_cast<AllocaInst>(dest))
         // TODO: make this a lifetime_end & dereferenceable annotation?
         ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlign());
-    if (jl_is_concrete_type(src.typ) || src.constant) {
-        jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ;
+    if (src.constant) {
+        jl_value_t *typ = jl_typeof(src.constant);
         assert(skip || jl_is_pointerfree(typ));
         if (jl_is_pointerfree(typ)) {
-            unsigned alignment = julia_alignment(typ);
-            if (!src.ispointer() || src.constant) {
-                emit_unbox_store(ctx, src, dest, tbaa_dst, alignment, isVolatile);
-            }
-            else {
-                Value *src_ptr = data_pointer(ctx, src);
-                unsigned nb = jl_datatype_size(typ);
-                // TODO: this branch may be bad for performance, but is necessary to work around LLVM bugs with the undef option that we want to use:
-                //   select copy dest -> dest to simulate an undef value / conditional copy
-                // if (skip) src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr);
-                auto f = [&] {
-                    (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
-                                      jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
-                    return nullptr;
-                };
-                if (skip)
-                    emit_guarded_test(ctx, skip, nullptr, f);
-                else
-                    f();
-            }
+            emit_guarded_test(ctx, skip, nullptr, [&] {
+                unsigned alignment = julia_alignment(typ);
+                emit_unbox_store(ctx, mark_julia_const(ctx, src.constant), dest, tbaa_dst, Align(alignment), isVolatile);
+                return nullptr;
+            });
+        }
+    }
+    else if (jl_is_concrete_type(src.typ)) {
+        assert(skip || jl_is_pointerfree(src.typ));
+        if (jl_is_pointerfree(src.typ)) {
+            emit_guarded_test(ctx, skip, nullptr, [&] {
+                unsigned alignment = julia_alignment(src.typ);
+                emit_unbox_store(ctx, src, dest, tbaa_dst, Align(alignment), isVolatile);
+                return nullptr;
+            });
         }
     }
     else if (src.TIndex) {
@@ -3627,7 +3921,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                             return;
                         } else {
                             emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), src_ptr,
-                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, alignment, alignment, isVolatile);
+                                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), nb, Align(alignment), Align(alignment), isVolatile);
                         }
                     }
                     ctx.builder.CreateBr(postBB);
@@ -3649,17 +3943,13 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
     }
     else {
         assert(src.isboxed && "expected boxed value for sizeof/alignment computation");
-        auto f = [&] {
+        emit_guarded_test(ctx, skip, nullptr, [&] {
             Value *datatype = emit_typeof(ctx, src, false, false);
             Value *copy_bytes = emit_datatype_size(ctx, datatype);
-            (void)emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
-                              jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, 1, 1, isVolatile);
+            emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dst), data_pointer(ctx, src),
+                        jl_aliasinfo_t::fromTBAA(ctx, src.tbaa), copy_bytes, Align(1), Align(1), isVolatile);
             return nullptr;
-        };
-        if (skip)
-            emit_guarded_test(ctx, skip, nullptr, f);
-        else
-            f();
+        });
     }
 }
 
@@ -3748,6 +4038,12 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg
     emit_write_barrier(ctx, parent, ptrs);
 }
 
+static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, const jl_cgval_t &x)
+{
+    auto ptrs = get_gc_roots_for(ctx, x, true);
+    emit_write_barrier(ctx, parent, ptrs);
+}
+
 static jl_cgval_t union_store(jl_codectx_t &ctx,
         Value *ptr, Value *ptindex, jl_cgval_t rhs, jl_cgval_t cmp,
         jl_value_t *jltype, MDNode *tbaa, MDNode *tbaa_tindex,
@@ -3787,6 +4083,12 @@ static jl_cgval_t union_store(jl_codectx_t &ctx,
                 rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
             }
             else {
+                if (trim_may_error(ctx.params->trim)) {
+                    // if we know the return type, we can assume the result is of that type
+                    errs() << "ERROR: Dynamic call to setfield/modifyfield\n";
+                    errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                    print_stacktrace(ctx, ctx.params->trim);
+                }
                 Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, julia_call);
                 rhs = mark_julia_type(ctx, callval, true, jl_any_type);
             }
@@ -3850,19 +4152,14 @@ static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
     auto tbaa = best_field_tbaa(ctx, strct, sty, idx0, byte_offset);
     Value *addr = data_pointer(ctx, strct);
     if (byte_offset > 0) {
-        addr = ctx.builder.CreateInBoundsGEP(
-                getInt8Ty(ctx.builder.getContext()),
-                addr,
-                ConstantInt::get(ctx.types().T_size, byte_offset));
+        addr = emit_ptrgep(ctx, addr, byte_offset);
         setNameWithField(ctx.emission_context, addr, get_objname, sty, idx0, Twine("_ptr"));
     }
     jl_value_t *jfty = jl_field_type(sty, idx0);
     bool isboxed = jl_field_isptr(sty, idx0);
     if (!isboxed && jl_is_uniontype(jfty)) {
         size_t fsz1 = jl_field_size(sty, idx0) - 1;
-        Value *ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()),
-                addr,
-                ConstantInt::get(ctx.types().T_size, fsz1));
+        Value *ptindex = emit_ptrgep(ctx, addr, fsz1);
         setNameWithField(ctx.emission_context, ptindex, get_objname, sty, idx0, Twine(".tindex_ptr"));
         return union_store(ctx, addr, ptindex, rhs, cmp, jfty, tbaa, ctx.tbaa().tbaa_unionselbyte,
             Order, FailOrder,
@@ -3893,25 +4190,24 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             Type *lt = julia_type_to_llvm(ctx, ty);
             unsigned na = nargs < nf ? nargs : nf;
 
-            // whether we should perform the initialization with the struct as a IR value
-            // or instead initialize the stack buffer with stores
-            auto tracked = CountTrackedPointers(lt);
+            // choose whether we should perform the initialization with the struct as a IR value
+            // or instead initialize the stack buffer with stores (the later is nearly always better)
+            auto tracked = split_value_size(sty);
+            assert(CountTrackedPointers(lt).count == tracked.second);
             bool init_as_value = false;
             if (lt->isVectorTy() || jl_is_vecelement_type(ty)) { // maybe also check the size ?
                 init_as_value = true;
             }
-            else if (tracked.count) {
-                init_as_value = true;
-            }
 
             Instruction *promotion_point = nullptr;
             ssize_t promotion_ssa = -1;
             Value *strct;
+            SmallVector<Value*,0> inline_roots;
             if (type_is_ghost(lt)) {
-                strct = NULL;
+                strct = nullptr;
             }
             else if (init_as_value) {
-                if (tracked.count) {
+                if (tracked.second) {
                     strct = Constant::getNullValue(lt);
                 }
                 else {
@@ -3920,13 +4216,19 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                         strct = ctx.builder.CreateFreeze(strct);
                 }
             }
+            else if (tracked.second) {
+                inline_roots.resize(tracked.second, Constant::getNullValue(ctx.types().T_prjlvalue));
+                strct = nullptr;
+                if (tracked.first) {
+                    AllocaInst *bits = emit_static_alloca(ctx, tracked.first, Align(julia_alignment(ty)));
+                    strct = bits;
+                    setName(ctx.emission_context, bits, arg_typename);
+                    is_promotable = false; // wrong layout for promotion
+                }
+            }
             else {
-                strct = emit_static_alloca(ctx, lt);
+                strct = emit_static_alloca(ctx, lt, Align(julia_alignment(ty)));
                 setName(ctx.emission_context, strct, arg_typename);
-                if (nargs < nf)
-                    promotion_point = ctx.builder.CreateStore(ctx.builder.CreateFreeze(UndefValue::get(lt)), strct);
-                if (tracked.count)
-                    undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack);
             }
 
             for (unsigned i = 0; i < na; i++) {
@@ -3938,26 +4240,33 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 fval_info = update_julia_type(ctx, fval_info, jtype);
                 if (fval_info.typ == jl_bottom_type)
                     return jl_cgval_t();
+                if (type_is_ghost(lt))
+                    continue;
+                Type *fty = julia_type_to_llvm(ctx, jtype);
+                if (type_is_ghost(fty))
+                    continue;
+                Instruction *dest = nullptr;
+                MutableArrayRef<Value*> roots;
+                ssize_t offs = jl_field_offset(sty, i);
+                ssize_t ptrsoffs = -1;
+                if (!inline_roots.empty())
+                    std::tie(offs, ptrsoffs) = split_value_field(sty, i);
+                unsigned llvm_idx = init_as_value ? ((i > 0 && isa<StructType>(lt)) ? convert_struct_offset(ctx, lt, offs) : i) : -1u;
                 // TODO: Use (post-)domination instead.
                 bool field_promotable = !jl_is_uniontype(jtype) && !init_as_value && fval_info.promotion_ssa != -1 &&
+                    fval_info.inline_roots.empty() && inline_roots.empty() && // these need to be compatible, if they were to be implemented
                     fval_info.promotion_point && fval_info.promotion_point->getParent() == ctx.builder.GetInsertBlock();
                 if (field_promotable) {
                     savedIP = ctx.builder.saveIP();
                     ctx.builder.SetInsertPoint(fval_info.promotion_point);
                 }
-                if (type_is_ghost(lt))
-                    continue;
-                Type *fty = julia_type_to_llvm(ctx, jtype);
-                if (type_is_ghost(fty))
-                    continue;
-                Value *dest = NULL;
-                unsigned offs = jl_field_offset(sty, i);
-                unsigned llvm_idx = (i > 0 && isa<StructType>(lt)) ? convert_struct_offset(ctx, lt, offs) : i;
                 if (!init_as_value) {
                     // avoid unboxing the argument explicitly
                     // and use memcpy instead
-                    Instruction *inst;
-                    dest = inst = cast<Instruction>(ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx));
+                    Instruction *inst = strct && offs >= 0 ? cast<Instruction>(emit_ptrgep(ctx, strct, offs)) : nullptr;
+                    if (!inline_roots.empty() && ptrsoffs >= 0)
+                        roots = MutableArrayRef(inline_roots).slice(ptrsoffs, jl_field_isptr(sty, i) ? 1 : ((jl_datatype_t*)jtype)->layout->npointers);
+                    dest = inst;
                     // Our promotion point needs to come before
                     //  A) All of our arguments' promotion points
                     //  B) Any instructions we insert at any of our arguments' promotion points
@@ -3977,10 +4286,13 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 if (jl_field_isptr(sty, i)) {
                     fval = boxed(ctx, fval_info, field_promotable);
                     if (!init_as_value) {
-                        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
-                        StoreInst *SI = cast<StoreInst>(ai.decorateInst(
-                                ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i)))));
-                        SI->setOrdering(AtomicOrdering::Unordered);
+                        if (dest) {
+                            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                            ai.decorateInst(ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i))));
+                        }
+                        else {
+                            roots[0] = fval;
+                        }
                     }
                 }
                 else if (jl_is_uniontype(jtype)) {
@@ -4003,23 +4315,22 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                         if (fsz1 > 0 && !fval_info.isghost) {
                             Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al);
                             assert(lt->getStructElementType(llvm_idx) == ET);
-                            AllocaInst *lv = emit_static_alloca(ctx, ET);
+                            AllocaInst *lv = emit_static_alloca(ctx, fsz1, Align(al));
                             setName(ctx.emission_context, lv, "unioninit");
-                            lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz1 + al - 1) / al));
                             emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr);
                             // emit all of the align-sized words
                             unsigned i = 0;
                             for (; i < fsz1 / al; i++) {
-                                Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                                Value *fldp = emit_ptrgep(ctx, lv, i * al);
                                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                                 Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(ET, fldp, Align(al)));
                                 strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
                             }
                             // emit remaining bytes up to tindex
                             if (i < ptindex - llvm_idx) {
-                                Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
+                                Value *staddr = emit_ptrgep(ctx, lv, i * al);
                                 for (; i < ptindex - llvm_idx; i++) {
-                                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                                    Value *fldp = emit_ptrgep(ctx, staddr, i);
                                     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
                                     Value *fldv = ai.decorateInst(ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), fldp, Align(1)));
                                     strct = ctx.builder.CreateInsertValue(strct, fldv, ArrayRef<unsigned>(llvm_idx + i));
@@ -4032,7 +4343,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                             fval = ctx.builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(llvm_idx));
                     }
                     else {
-                        Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz1);
+                        Value *ptindex = emit_ptrgep(ctx, strct, offs + fsz1);
                         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
                         ai.decorateInst(ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
                         if (!rhs_union.isghost)
@@ -4043,10 +4354,15 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     if (field_promotable) {
                         fval_info.V->replaceAllUsesWith(dest);
                         cast<Instruction>(fval_info.V)->eraseFromParent();
-                    } else if (init_as_value) {
+                    }
+                    else if (init_as_value) {
                         fval = emit_unbox(ctx, fty, fval_info, jtype);
-                    } else {
-                        emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, jl_field_align(sty, i));
+                    }
+                    else if (!roots.empty()) {
+                        split_value_into(ctx, fval_info, Align(julia_alignment(jtype)), dest, Align(jl_field_align(sty, i)), jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), roots);
+                    }
+                    else {
+                        emit_unbox_store(ctx, fval_info, dest, ctx.tbaa().tbaa_stack, Align(jl_field_align(sty, i)));
                     }
                 }
                 if (init_as_value) {
@@ -4066,26 +4382,43 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             }
             for (size_t i = nargs; i < nf; i++) {
                 if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
-                    unsigned offs = jl_field_offset(sty, i);
+                    ssize_t offs = jl_field_offset(sty, i);
+                    ssize_t ptrsoffs = -1;
+                    if (!inline_roots.empty())
+                        std::tie(offs, ptrsoffs) = split_value_field(sty, i);
+                    assert(ptrsoffs < 0 && offs >= 0);
                     int fsz = jl_field_size(sty, i) - 1;
-                    unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
-                    if (init_as_value)
+                    if (init_as_value) {
+                        unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
                         strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef<unsigned>(llvm_idx));
+                    }
                     else {
                         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
-                        ai.decorateInst(ctx.builder.CreateAlignedStore(
-                                ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
-                                ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx),
-                                Align(1)));
+                        Instruction *dest = cast<Instruction>(emit_ptrgep(ctx, strct, offs + fsz));
+                        if (promotion_point == nullptr)
+                            promotion_point = dest;
+                        ai.decorateInst(ctx.builder.CreateAlignedStore(ctx.builder.getInt8(0), dest, Align(1)));
                     }
                 }
             }
+            if (nargs < nf) {
+                assert(!init_as_value);
+                IRBuilderBase::InsertPoint savedIP = ctx.builder.saveIP();
+                if (promotion_point)
+                    ctx.builder.SetInsertPoint(promotion_point);
+                if (strct) {
+                    promotion_point = cast<FreezeInst>(ctx.builder.CreateFreeze(UndefValue::get(lt)));
+                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                    ai.decorateInst(ctx.builder.CreateStore(promotion_point, strct));
+                }
+                ctx.builder.restoreIP(savedIP);
+            }
             if (type_is_ghost(lt))
                 return mark_julia_const(ctx, sty->instance);
             else if (init_as_value)
                 return mark_julia_type(ctx, strct, false, ty);
             else {
-                jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa().tbaa_stack);
+                jl_cgval_t ret = mark_julia_slot(strct, ty, NULL, ctx.tbaa().tbaa_stack, inline_roots);
                 if (is_promotable && promotion_point) {
                     ret.promotion_point = promotion_point;
                     ret.promotion_ssa = promotion_ssa;
@@ -4103,8 +4436,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
                 ai.decorateInst(ctx.builder.CreateAlignedStore(
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
-                        ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), strct,
-                                ConstantInt::get(ctx.types().T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
+                        emit_ptrgep(ctx, strct, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1),
                         Align(1)));
             }
         }
@@ -4146,9 +4478,7 @@ static Value *emit_defer_signal(jl_codectx_t &ctx)
 {
     ++EmittedDeferSignal;
     Value *ptls = get_current_ptls(ctx);
-    Constant *offset = ConstantInt::getSigned(getInt32Ty(ctx.builder.getContext()),
-            offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t));
-    return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
+    return emit_ptrgep(ctx, ptls, offsetof(jl_tls_states_t, defer_signal));
 }
 
 #ifndef JL_NDEBUG
@@ -4177,6 +4507,7 @@ static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, Value *mem, Value *data, co
     Value *ref = Constant::getNullValue(get_memoryref_type(ctx.builder.getContext(), ctx.types().T_size, layout, 0));
     ref = ctx.builder.CreateInsertValue(ref, data, 0);
     ref = ctx.builder.CreateInsertValue(ref, mem, 1);
+    setName(ctx.emission_context, ref, "memory_ref");
     return mark_julia_type(ctx, ref, false, typ);
 }
 
@@ -4191,12 +4522,26 @@ static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &mem, cons
 
 static Value *emit_memoryref_FCA(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
 {
-    if (ref.ispointer()) {
+    if (!ref.inline_roots.empty()) {
+        LLVMContext &C = ctx.builder.getContext();
+        StructType *type = get_memoryref_type(C, ctx.types().T_size, layout, 0);
+        LoadInst *load0 = ctx.builder.CreateLoad(type->getElementType(0), ref.V);
+        jl_aliasinfo_t ai0 = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
+        ai0.decorateInst(load0);
+        setName(ctx.emission_context, load0, "memory_ref_FCA0");
+        Value *root = ctx.builder.CreateBitCast(ref.inline_roots[0], type->getElementType(1));
+        Value *load = Constant::getNullValue(type);
+        load = ctx.builder.CreateInsertValue(load, load0, 0);
+        load = ctx.builder.CreateInsertValue(load, root, 1);
+        return load;
+    }
+    else if (ref.ispointer()) {
         LLVMContext &C = ctx.builder.getContext();
         Type *type = get_memoryref_type(C, ctx.types().T_size, layout, 0);
         LoadInst *load = ctx.builder.CreateLoad(type, data_pointer(ctx, ref));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ref.tbaa);
         ai.decorateInst(load);
+        setName(ctx.emission_context, load, "memory_ref_FCA");
         return load;
     }
     else {
@@ -4213,9 +4558,12 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
         return jl_cgval_t();
     Value *V = emit_memoryref_FCA(ctx, ref, layout);
     Value *data = CreateSimplifiedExtractValue(ctx, V, 0);
+    maybeSetName(ctx.emission_context, data, "memoryref_data");
     Value *mem = CreateSimplifiedExtractValue(ctx, V, 1);
+    maybeSetName(ctx.emission_context, mem, "memoryref_mem");
     Value *i = emit_unbox(ctx, ctx.types().T_size, idx, (jl_value_t*)jl_long_type);
     Value *offset = ctx.builder.CreateSub(i, ConstantInt::get(ctx.types().T_size, 1));
+    setName(ctx.emission_context, offset, "memoryref_offset");
     Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
     bool bc = bounds_check_enabled(ctx, inbounds);
 #if 1
@@ -4227,12 +4575,14 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
     bool isghost = layout->size == 0;
     if ((!isboxed && isunion) || isghost) {
         newdata = ctx.builder.CreateAdd(data, offset);
+        setName(ctx.emission_context, newdata, "memoryref_data+offset");
         if (bc) {
             BasicBlock *failBB, *endBB;
             failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
             endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
             Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
             Value *inbound = ctx.builder.CreateICmpULT(newdata, mlen);
+            setName(ctx.emission_context, offset, "memoryref_isinbounds");
             ctx.builder.CreateCondBr(inbound, endBB, failBB);
             failBB->insertInto(ctx.f);
             ctx.builder.SetInsertPoint(failBB);
@@ -4260,11 +4610,13 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
             // and we can further rearrange that as ovflw = !( offset+len < len+len ) as unsigned math
             Value *mlen = emit_genericmemorylen(ctx, mem, ref.typ);
             ovflw = ctx.builder.CreateICmpUGE(ctx.builder.CreateAdd(offset, mlen), ctx.builder.CreateNUWAdd(mlen, mlen));
+            setName(ctx.emission_context, ovflw, "memoryref_ovflw");
         }
 #endif
-        //Is this change fine
         boffset = ctx.builder.CreateMul(offset, elsz);
+        setName(ctx.emission_context, boffset, "memoryref_byteoffset");
         newdata = ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()), data, boffset);
+        setName(ctx.emission_context, newdata, "memoryref_data_byteoffset");
         (void)boffset; // LLVM is very bad at handling GEP with types different from the load
         if (bc) {
             BasicBlock *failBB, *endBB;
@@ -4287,8 +4639,11 @@ static jl_cgval_t emit_memoryref(jl_codectx_t &ctx, const jl_cgval_t &ref, jl_cg
                 ctx.builder.CreatePtrToInt(newdata, ctx.types().T_size),
                 ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
             Value *blen = ctx.builder.CreateMul(mlen, elsz, "", true, true);
+            setName(ctx.emission_context, blen, "memoryref_bytelen");
             Value *inbound = ctx.builder.CreateICmpULT(bidx0, blen);
+            setName(ctx.emission_context, inbound, "memoryref_isinbounds");
             inbound = ctx.builder.CreateAnd(ctx.builder.CreateNot(ovflw), inbound);
+            setName(ctx.emission_context, inbound, "memoryref_isinbounds&notovflw");
 #else
             Value *idx0; // (newdata - mptr) / elsz
             idx0 = ctx.builder.CreateSub(
@@ -4325,8 +4680,10 @@ static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref
         offset = ctx.builder.CreateSub(
             ctx.builder.CreatePtrToInt(data, ctx.types().T_size),
             ctx.builder.CreatePtrToInt(mptr, ctx.types().T_size));
+        setName(ctx.emission_context, offset, "memoryref_offset");
         Value *elsz = emit_genericmemoryelsize(ctx, mem, ref.typ, false);
         offset = ctx.builder.CreateExactUDiv(offset, elsz);
+        setName(ctx.emission_context, offset, "memoryref_offsetidx");
     }
     offset = ctx.builder.CreateAdd(offset, ConstantInt::get(ctx.types().T_size, 1));
     return mark_julia_type(ctx, offset, false, jl_long_type);
@@ -4335,7 +4692,9 @@ static jl_cgval_t emit_memoryref_offset(jl_codectx_t &ctx, const jl_cgval_t &ref
 static Value *emit_memoryref_mem(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
 {
     Value *V = emit_memoryref_FCA(ctx, ref, layout);
-    return CreateSimplifiedExtractValue(ctx, V, 1);
+    V = CreateSimplifiedExtractValue(ctx, V, 1);
+    maybeSetName(ctx.emission_context, V, "memoryref_mem");
+    return V;
 }
 
 static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const jl_datatype_layout_t *layout)
@@ -4357,13 +4716,15 @@ static Value *emit_memoryref_ptr(jl_codectx_t &ctx, const jl_cgval_t &ref, const
     data = ctx.builder.CreateCall(prepare_call(gc_loaded_func), { mem, data });
     if (!GEPlist.empty()) {
         for (auto &GEP : make_range(GEPlist.rbegin(), GEPlist.rend())) {
-            Instruction *GEP2 = GEP->clone();
+            GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GEP->clone());
             GEP2->mutateType(PointerType::get(GEP->getResultElementType(), AS));
             GEP2->setOperand(GetElementPtrInst::getPointerOperandIndex(), data);
+            GEP2->setIsInBounds(true);
             ctx.builder.Insert(GEP2);
             data = GEP2;
         }
     }
+    setName(ctx.emission_context, data, "memoryref_data");
     return data;
 }
 
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 9caff014c7703..ecaeb460ebf91 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -767,7 +767,7 @@ bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD, const S
   SourceLocation Loc = FD->getLocation();
   StringRef Name = SM.getFilename(Loc);
   Name = llvm::sys::path::filename(Name);
-  if (Name.startswith("llvm-"))
+  if (Name.starts_with("llvm-"))
       return true;
   return false;
 }
@@ -911,9 +911,9 @@ bool GCChecker::isSafepoint(const CallEvent &Call, CheckerContext &C) const {
       if (FD->getBuiltinID() != 0 || FD->isTrivial())
         isCalleeSafepoint = false;
       else if (FD->getDeclName().isIdentifier() &&
-               (FD->getName().startswith("uv_") ||
-                FD->getName().startswith("unw_") ||
-                FD->getName().startswith("_U")) &&
+               (FD->getName().starts_with("uv_") ||
+                FD->getName().starts_with("unw_") ||
+                FD->getName().starts_with("_U")) &&
                FD->getName() != "uv_run")
         isCalleeSafepoint = false;
       else
@@ -1050,13 +1050,13 @@ bool GCChecker::processAllocationOfResult(const CallEvent &Call,
         // global roots.
         StringRef FDName =
             FD->getDeclName().isIdentifier() ? FD->getName() : "";
-        if (FDName.startswith("jl_box_") || FDName.startswith("ijl_box_")) {
+        if (FDName.starts_with("jl_box_") || FDName.starts_with("ijl_box_")) {
           SVal Arg = Call.getArgSVal(0);
           if (auto CI = Arg.getAs<nonloc::ConcreteInt>()) {
             const llvm::APSInt &Value = CI->getValue();
             bool GloballyRooted = false;
             const int64_t NBOX_C = 1024;
-            if (FDName.startswith("jl_box_u") || FDName.startswith("ijl_box_u")) {
+            if (FDName.starts_with("jl_box_u") || FDName.starts_with("ijl_box_u")) {
               if (Value < NBOX_C) {
                 GloballyRooted = true;
               }
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
index 3e97c149bffe3..7ddb68fd6b036 100644
--- a/src/codegen-stubs.c
+++ b/src/codegen-stubs.c
@@ -15,6 +15,7 @@ JL_DLLEXPORT void jl_dump_native_fallback(void *native_code,
         ios_t *z, ios_t *s) UNAVAILABLE
 JL_DLLEXPORT void jl_get_llvm_gvs_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
 JL_DLLEXPORT void jl_get_llvm_external_fns_fallback(void *native_code, arraylist_t *gvs) UNAVAILABLE
+JL_DLLEXPORT void jl_get_llvm_mis_fallback(void *native_code, arraylist_t* MIs) UNAVAILABLE
 
 JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world,
@@ -107,8 +108,6 @@ JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr)
     return 0;
 }
 
-JL_DLLEXPORT void jl_build_newpm_pipeline_fallback(void *MPM, void *PB, void *config) UNAVAILABLE
-
 JL_DLLEXPORT void jl_register_passbuilder_callbacks_fallback(void *PB) { }
 
 #define MODULE_PASS(NAME, CLASS, CREATE_PASS) \
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 875beb7c287dc..bcda527416676 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -16,6 +16,7 @@
 #include <array>
 #include <vector>
 #include <set>
+#include <unordered_set>
 #include <functional>
 
 // target machine computation
@@ -171,6 +172,14 @@ void setName(jl_codegen_params_t &params, Value *V, const Twine &Name)
     }
 }
 
+void maybeSetName(jl_codegen_params_t &params, Value *V, const Twine &Name)
+{
+    // To be used when we may get an Instruction or something that is not an instruction i.e Constants/Arguments
+    if (params.debug_level >= 2 && isa<Instruction>(V)) {
+        V->setName(Name);
+    }
+}
+
 void setName(jl_codegen_params_t &params, Value *V, std::function<std::string()> GetName)
 {
     assert((isa<Constant>(V) || isa<Instruction>(V)) && "Should only set names on instructions!");
@@ -907,14 +916,6 @@ static const auto jlcheckassignonce_func = new JuliaFunction<>{
             {T_pjlvalue, T_pjlvalue, T_pjlvalue, PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     nullptr,
 };
-static const auto jldeclareconst_func = new JuliaFunction<>{
-    XSTR(jl_declare_constant),
-    [](LLVMContext &C) {
-        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
-        return FunctionType::get(getVoidTy(C),
-            {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false); },
-    nullptr,
-};
 static const auto jldeclareconstval_func = new JuliaFunction<>{
     XSTR(jl_declare_constant_val),
     [](LLVMContext &C) {
@@ -951,6 +952,16 @@ static const auto jlgetbindingwrorerror_func = new JuliaFunction<>{
     },
     nullptr,
 };
+static const auto jlgetbindingvalue_func = new JuliaFunction<>{
+    XSTR(jl_reresolve_binding_value_seqcst),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+                {T_pjlvalue}, false);
+    },
+    nullptr,
+};
 static const auto jlboundp_func = new JuliaFunction<>{
     XSTR(jl_boundp),
     [](LLVMContext &C) {
@@ -1010,13 +1021,12 @@ static const auto jlmethod_func = new JuliaFunction<>{
     nullptr,
 };
 static const auto jlgenericfunction_func = new JuliaFunction<>{
-    XSTR(jl_generic_function_def),
+    XSTR(jl_declare_const_gf),
     [](LLVMContext &C) {
         auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
         auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
         auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
-        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue}, false);
+        return FunctionType::get(T_prjlvalue, {T_pjlvalue, T_pjlvalue, T_pjlvalue}, false);
     },
     nullptr,
 };
@@ -1610,6 +1620,7 @@ static const auto &builtin_func_map() {
           { jl_f__call_in_world_addr,     new JuliaFunction<>{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
           { jl_f__call_in_world_total_addr, new JuliaFunction<>{XSTR(jl_f__call_in_world_total), get_func_sig, get_func_attrs} },
           { jl_f_throw_addr,              new JuliaFunction<>{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
+          { jl_f_throw_methoderror_addr,  new JuliaFunction<>{XSTR(jl_f_throw_methoderror), get_func_sig, get_func_attrs} },
           { jl_f_tuple_addr,              jltuple_func },
           { jl_f_svec_addr,               new JuliaFunction<>{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
           { jl_f_applicable_addr,         new JuliaFunction<>{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
@@ -1635,36 +1646,29 @@ static const auto &builtin_func_map() {
           { jl_f_donotdelete_addr,        new JuliaFunction<>{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
           { jl_f_compilerbarrier_addr,    new JuliaFunction<>{XSTR(jl_f_compilerbarrier), get_func_sig, get_func_attrs} },
           { jl_f_finalizer_addr,          new JuliaFunction<>{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} },
-          { jl_f__svec_ref_addr,          new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} }
+          { jl_f__svec_ref_addr,          new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} },
+          { jl_f_current_scope_addr,      new JuliaFunction<>{XSTR(jl_f_current_scope), get_func_sig, get_func_attrs} },
         };
     return builtins;
 }
 
+static const auto &may_dispatch_builtins() {
+    static std::unordered_set<jl_fptr_args_t> builtins(
+        {jl_f__apply_iterate_addr,
+        jl_f__apply_pure_addr,
+        jl_f__call_in_world_addr,
+        jl_f__call_in_world_total_addr,
+        jl_f__call_latest_addr,
+        });
+    return builtins;
+}
+
 static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction<>{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
 
 static _Atomic(uint64_t) globalUniqueGeneratedNames{1};
 
 // --- code generation ---
 
-extern "C" {
-    jl_cgparams_t jl_default_cgparams = {
-        /* track_allocations */ 1,
-        /* code_coverage */ 1,
-        /* prefer_specsig */ 0,
-#ifdef _OS_WINDOWS_
-        /* gnu_pubnames */ 0,
-#else
-        /* gnu_pubnames */ 1,
-#endif
-        /* debug_info_kind */ (int) DICompileUnit::DebugEmissionKind::FullDebug,
-        /* debug_line_info */ 1,
-        /* safepoint_on_entry */ 1,
-        /* gcstack_arg */ 1,
-        /* use_jlplt*/ 1,
-        /* lookup */ jl_rettype_inferred_addr };
-}
-
-
 static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
     jt = jl_unwrap_unionall(jt);
     if (jt == (jl_value_t*)jl_datatype_type ||
@@ -1805,11 +1809,12 @@ struct jl_cgval_t {
     Value *Vboxed;
 
     Value *TIndex; // if `V` is an unboxed (tagged) Union described by `typ`, this gives the DataType index (1-based, small int) as an i8
+    SmallVector<Value*,0> inline_roots; // if present, `V` is a pointer, but not in canonical layout
     jl_value_t *constant; // constant value (rooted in linfo.def.roots)
-    jl_value_t *typ; // the original type of V, never NULL
+    jl_value_t *typ; // the original type of V, never nullptr
     bool isboxed; // whether this value is a jl_value_t* allocated on the heap with the right type tag
     bool isghost; // whether this value is "ghost"
-    MDNode *tbaa; // The related tbaa node. Non-NULL iff this holds an address.
+    MDNode *tbaa; // The related tbaa node. Non-nullptr iff this holds an address.
     // If non-null, this memory location may be promoted on use, by hoisting the
     // destination memory above the promotion point.
     Instruction *promotion_point;
@@ -1820,13 +1825,15 @@ struct jl_cgval_t {
     bool ispointer() const
     {
         // whether this value is compatible with `data_pointer`
+        assert(inline_roots.empty());
         return tbaa != nullptr;
     }
     jl_cgval_t(Value *Vval, jl_value_t *typ, Value *tindex) : // general value constructor
-        V(Vval), // V is allowed to be NULL in a jl_varinfo_t context, but not during codegen contexts
+        V(Vval), // V is allowed to be nullptr in a jl_varinfo_t context, but not during codegen contexts
         Vboxed(nullptr),
         TIndex(tindex),
-        constant(NULL),
+        inline_roots(),
+        constant(nullptr),
         typ(typ),
         isboxed(false),
         isghost(false),
@@ -1834,13 +1841,15 @@ struct jl_cgval_t {
         promotion_point(nullptr),
         promotion_ssa(-1)
     {
-        assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext()));
+        assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext()));
     }
-    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa) : // general pointer constructor
+    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, Value* inline_roots) = delete;
+    jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, ArrayRef<Value*> inline_roots) : // general pointer constructor
         V(Vptr),
         Vboxed(isboxed ? Vptr : nullptr),
         TIndex(tindex),
-        constant(NULL),
+        inline_roots(inline_roots),
+        constant(nullptr),
         typ(typ),
         isboxed(isboxed),
         isghost(false),
@@ -1850,15 +1859,16 @@ struct jl_cgval_t {
     {
         if (Vboxed)
             assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext()));
-        assert(tbaa != NULL);
-        assert(!(isboxed && TIndex != NULL));
-        assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext()));
+        assert(tbaa != nullptr);
+        assert(!(isboxed && TIndex != nullptr));
+        assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext()));
     }
     explicit jl_cgval_t(jl_value_t *typ) : // ghost value constructor
-        // mark explicit to avoid being used implicitly for conversion from NULL (use jl_cgval_t() instead)
-        V(NULL),
-        Vboxed(NULL),
-        TIndex(NULL),
+        // mark explicit to avoid being used implicitly for conversion from nullptr (use jl_cgval_t() instead)
+        V(nullptr),
+        Vboxed(nullptr),
+        TIndex(nullptr),
+        inline_roots(),
         constant(((jl_datatype_t*)typ)->instance),
         typ(typ),
         isboxed(false),
@@ -1874,6 +1884,7 @@ struct jl_cgval_t {
         V(v.V),
         Vboxed(v.Vboxed),
         TIndex(tindex),
+        inline_roots(v.inline_roots),
         constant(v.constant),
         typ(typ),
         isboxed(v.isboxed),
@@ -1887,17 +1898,18 @@ struct jl_cgval_t {
         // this constructor expects we had a badly or equivalently typed version
         // make sure we aren't discarding the actual type information
         if (v.TIndex) {
-            assert((TIndex == NULL) == jl_is_concrete_type(typ));
+            assert((TIndex == nullptr) == jl_is_concrete_type(typ));
         }
         else {
             assert(isboxed || v.typ == typ || tindex);
         }
     }
     explicit jl_cgval_t() : // undef / unreachable constructor
-        V(NULL),
-        Vboxed(NULL),
-        TIndex(NULL),
-        constant(NULL),
+        V(nullptr),
+        Vboxed(nullptr),
+        TIndex(nullptr),
+        inline_roots(),
+        constant(nullptr),
         typ(jl_bottom_type),
         isboxed(false),
         isghost(true),
@@ -1913,6 +1925,7 @@ struct jl_varinfo_t {
     Instruction *boxroot; // an address, if the var might be in a jl_value_t** stack slot (marked ctx.tbaa().tbaa_const, if appropriate)
     jl_cgval_t value; // a stack slot or constant value
     Value *pTIndex; // i8* stack slot for the value.TIndex tag describing `value.V`
+    AllocaInst *inline_roots; // stack roots for the inline_roots array, if needed
     DILocalVariable *dinfo;
     // if the variable might be used undefined and is not boxed
     // this i1 flag is true when it is defined
@@ -1923,11 +1936,12 @@ struct jl_varinfo_t {
     bool usedUndef;
     bool used;
 
-    jl_varinfo_t(LLVMContext &ctxt) : boxroot(NULL),
+    jl_varinfo_t(LLVMContext &ctxt) : boxroot(nullptr),
                      value(jl_cgval_t()),
-                     pTIndex(NULL),
-                     dinfo(NULL),
-                     defFlag(NULL),
+                     pTIndex(nullptr),
+                     inline_roots(nullptr),
+                     dinfo(nullptr),
+                     defFlag(nullptr),
                      isSA(false),
                      isVolatile(false),
                      isArgument(false),
@@ -1951,7 +1965,7 @@ class jl_codectx_t {
     std::map<int, jl_varinfo_t> phic_slots;
     std::map<int, std::pair<Value*, Value*> > scope_restore;
     SmallVector<jl_cgval_t, 0> SAvalues;
-    SmallVector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, jl_value_t *>, 0> PhiNodes;
+    SmallVector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, SmallVector<PHINode*,0>, jl_value_t *>, 0> PhiNodes;
     SmallVector<bool, 0> ssavalue_assigned;
     SmallVector<int, 0> ssavalue_usecount;
     jl_module_t *module = NULL;
@@ -1966,7 +1980,7 @@ class jl_codectx_t {
     size_t max_world = -1;
     const char *name = NULL;
     StringRef file{};
-    ssize_t *line = NULL;
+    int32_t line = -1;
     Value *spvals_ptr = NULL;
     Value *argArray = NULL;
     Value *argCount = NULL;
@@ -2099,7 +2113,8 @@ jl_aliasinfo_t jl_aliasinfo_t::fromTBAA(jl_codectx_t &ctx, MDNode *tbaa) {
 }
 
 static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments=nullptr, size_t *args_begin=nullptr);
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg,
+        ArrayRef<const char*> ArgNames=None, unsigned nreq=0);
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
                                      jl_binding_t **pbnd, bool assign, bool alloc);
@@ -2108,6 +2123,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
 static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg);
 static Value *get_current_task(jl_codectx_t &ctx);
 static Value *get_current_ptls(jl_codectx_t &ctx);
+static Value *get_scope_field(jl_codectx_t &ctx);
 static Value *get_tls_world_age_field(jl_codectx_t &ctx);
 static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, FunctionCallee theFptr, Value *theF,
@@ -2121,6 +2137,180 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
 static unsigned julia_alignment(jl_value_t *jt);
+static void recombine_value(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dst, jl_aliasinfo_t const &dst_ai, Align alignment, bool isVolatile);
+
+static void print_stack_crumbs(jl_codectx_t &ctx)
+{
+    errs() << "\n";
+    errs() << "Stacktrace:\n";
+    jl_method_instance_t *caller = ctx.linfo;
+    jl_((jl_value_t*)caller);
+    errs() << "In " << ctx.file << ":" << ctx.line << "\n";
+    while (true) {
+        auto it = ctx.emission_context.enqueuers.find(caller);
+        if (it != ctx.emission_context.enqueuers.end()) {
+            caller = std::get<jl_method_instance_t *>(it->second);
+        } else {
+            break;
+        }
+        if (caller) {
+            if (jl_is_method_instance(caller)) {
+                for (auto it2 = std::get<CallFrames>(it->second).begin(); it2 != (std::prev(std::get<CallFrames>(it->second).end())); ++it2) {
+                    auto frame = *it2;
+                    errs() << std::get<0>(frame) << "<inlined> \n";
+                    errs() << "In " << std::get<1>(frame) << ":" << std::get<unsigned int>(frame) << "\n";
+                }
+                auto &frame = std::get<CallFrames>(it->second).front();
+                jl_((jl_value_t*)caller);
+                errs() << "In " << std::get<1>(frame) << ":" << std::get<unsigned int>(frame) << "\n";
+            }
+        }
+        else
+            break;
+    }
+    abort();
+}
+
+static jl_value_t *StackFrame(
+        jl_value_t *linfo,
+        std::string fn_name,
+        std::string filepath,
+        int32_t lineno,
+        jl_value_t *inlined)
+{
+    jl_value_t *StackFrame = jl_get_global(jl_base_module, jl_symbol("StackFrame"));
+    assert(StackFrame != nullptr);
+
+    jl_value_t *args[7] = {
+        /* func */ (jl_value_t *)jl_symbol(fn_name.c_str()),
+        /* line */ (jl_value_t *)jl_symbol(filepath.c_str()),
+        /* line */ jl_box_int32(lineno),
+        /* linfo */ (jl_value_t *)linfo,
+        /* from_c */ jl_false,
+        /* inlined */ inlined,
+        /* pointer */ jl_box_uint64(0)
+    };
+
+    jl_value_t *frame = nullptr;
+    JL_TRY {
+        frame = jl_apply_generic(StackFrame, args, 7);
+    } JL_CATCH {
+        jl_safe_printf("Error creating stack frame\n");
+    }
+    return frame;
+}
+
+static void push_frames(jl_codectx_t &ctx, jl_method_instance_t *caller, jl_method_instance_t *callee, int no_debug=false)
+{
+    CallFrames frames;
+    auto it = ctx.emission_context.enqueuers.find(callee);
+    if (it != ctx.emission_context.enqueuers.end())
+        return;
+    if (no_debug) { // Used in tojlinvoke
+        frames.push_back({ctx.funcName, "", 0});
+        ctx.emission_context.enqueuers.insert({callee, {caller, std::move(frames)}});
+        return;
+    }
+    auto DL = ctx.builder.getCurrentDebugLocation();
+    auto filename = std::string(DL->getFilename());
+    auto line = DL->getLine();
+    auto fname = std::string(DL->getScope()->getSubprogram()->getName());
+    frames.push_back({fname, filename, line});
+    auto DI = DL.getInlinedAt();
+    while (DI) {
+        auto filename = std::string(DI->getFilename());
+        auto line = DI->getLine();
+        auto fname = std::string(DI->getScope()->getSubprogram()->getName());
+        frames.push_back({fname, filename, line});
+        DI = DI->getInlinedAt();
+    }
+    ctx.emission_context.enqueuers.insert({callee, {caller, std::move(frames)}});
+}
+
+static jl_array_t* build_stack_crumbs(jl_codectx_t &ctx) JL_NOTSAFEPOINT
+{
+    static intptr_t counter = 5;
+    jl_method_instance_t *caller = (jl_method_instance_t*)counter; //nothing serves as a sentinel for the bottom for the stack
+    push_frames(ctx, ctx.linfo, (jl_method_instance_t*)caller);
+    counter++;
+    jl_array_t *out = jl_alloc_array_1d(jl_array_any_type, 0);
+    JL_GC_PUSH1(&out);
+    while (true) {
+        auto it = ctx.emission_context.enqueuers.find(caller);
+        if (it != ctx.emission_context.enqueuers.end()) {
+            caller = std::get<jl_method_instance_t *>(it->second);
+        } else {
+            break;
+        }
+        if (caller) {
+            assert(ctx.emission_context.enqueuers.count(caller) == 1);
+            if (jl_is_method_instance(caller)) {
+                //TODO: Use a subrange when C++20 is a thing
+                for (auto it2 = std::get<CallFrames>(it->second).begin(); it2 != (std::prev(std::get<CallFrames>(it->second).end())); ++it2) {
+                    auto frame = *it2;
+                    jl_value_t *stackframe = StackFrame(jl_nothing, std::get<0>(frame), std::get<1>(frame), std::get<unsigned int>(frame), jl_true);
+                    if (stackframe == nullptr)
+                        print_stack_crumbs(ctx);
+                    jl_array_ptr_1d_push(out, stackframe);
+                }
+                auto &frame = std::get<CallFrames>(it->second).back();
+                jl_value_t *stackframe = StackFrame((jl_value_t *)caller, std::get<0>(frame), std::get<1>(frame), std::get<unsigned int>(frame), jl_false);
+                if (stackframe == nullptr)
+                    print_stack_crumbs(ctx);
+                jl_array_ptr_1d_push(out, stackframe);
+            }
+        }
+        else
+            break;
+    }
+    JL_GC_POP();
+    return out;
+}
+
+static void print_stacktrace(jl_codectx_t &ctx, int trim)
+{
+    jl_task_t *ct = jl_get_current_task();
+    assert(ct);
+
+    // Temporarily operate in the current age
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
+    jl_array_t* bt = build_stack_crumbs(ctx);
+    JL_GC_PUSH1(&bt);
+
+    // Call `reinit_stdio` to get TTY IO objects (w/ color)
+    jl_value_t *reinit_stdio = jl_get_global(jl_base_module, jl_symbol("_reinit_stdio"));
+    assert(reinit_stdio);
+    jl_apply_generic(reinit_stdio, nullptr, 0);
+
+    // Show the backtrace
+    jl_value_t *show_backtrace = jl_get_global(jl_base_module, jl_symbol("show_backtrace"));
+    jl_value_t *base_stderr = jl_get_global(jl_base_module, jl_symbol("stderr"));
+    assert(show_backtrace && base_stderr);
+
+    JL_TRY {
+        jl_value_t *args[2] = { base_stderr, (jl_value_t *)bt };
+        jl_apply_generic(show_backtrace, args, 2);
+    } JL_CATCH {
+        jl_printf(JL_STDERR,"Error showing backtrace\n");
+        print_stack_crumbs(ctx);
+    }
+
+    jl_printf(JL_STDERR, "\n\n");
+    JL_GC_POP();
+    ct->world_age = last_age;
+
+    if (trim == JL_TRIM_SAFE) {
+        jl_printf(JL_STDERR,"Aborting compilation due to finding a dynamic dispatch");
+        exit(1);
+    }
+    return;
+}
+
+static int trim_may_error(int trim)
+{
+    return (trim == JL_TRIM_SAFE) || (trim == JL_TRIM_UNSAFE_WARN);
+}
 
 static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
 {
@@ -2152,6 +2342,20 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G)
     return cast<GlobalVariable>(local);
 }
 
+static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, size_t byte_offset, const Twine &Name="")
+{
+    auto *gep = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), base, byte_offset);
+    setName(ctx.emission_context, gep, Name);
+    return gep;
+}
+
+static Value *emit_ptrgep(jl_codectx_t &ctx, Value *base, Value *byte_offset, const Twine &Name="")
+{
+    auto *gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), base, byte_offset, Name);
+    setName(ctx.emission_context, gep, Name);
+    return gep;
+}
+
 
 // --- convenience functions for tagging llvm values with julia types ---
 
@@ -2180,15 +2384,37 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con
             gv = get_gv(gvname);
         }
     }
-    assert(gv->getName().startswith(name.str()));
+    assert(gv->getName().starts_with(name.str()));
     assert(val == gv->getInitializer());
     return gv;
 }
 
-static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty)
+static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty, Align align)
 {
     ++EmittedAllocas;
-    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), "", /*InsertBefore=*/ctx.topalloca);
+    return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), nullptr, align, "", /*InsertBefore=*/ctx.topalloca);
+}
+
+static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, unsigned nb, Align align)
+{
+    // Stupid hack: SROA takes hints from the element type, and will happily split this allocation into lots of unaligned bits
+    // if it cannot find something better to do, which is terrible for performance.
+    // However, if we emit this with an element size equal to the alignment, it will instead split it into aligned chunks
+    // which is great for performance and vectorization.
+    if (alignTo(nb, align) == align.value()) // don't bother with making an array of length 1
+        return emit_static_alloca(ctx, ctx.builder.getIntNTy(align.value() * 8), align);
+    return emit_static_alloca(ctx, ArrayType::get(ctx.builder.getIntNTy(align.value() * 8), alignTo(nb, align) / align.value()), align);
+}
+
+static AllocaInst *emit_static_roots(jl_codectx_t &ctx, unsigned nroots)
+{
+    AllocaInst *staticroots = emit_static_alloca(ctx, ctx.types().T_prjlvalue, Align(sizeof(void*)));
+    staticroots->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nroots));
+    IRBuilder<> builder(ctx.topalloca);
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+    // make sure these are nullptr early from LLVM's perspective, in case it decides to SROA it
+    ai.decorateInst(builder.CreateMemSet(staticroots, builder.getInt8(0), nroots * sizeof(void*), staticroots->getAlign()))->moveAfter(ctx.topalloca);
+    return staticroots;
 }
 
 static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
@@ -2202,7 +2428,7 @@ static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *st
     size_t i, np = sty->layout->npointers;
     auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx.builder.getContext());
     for (i = 0; i < np; i++) {
-        Value *fld = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i));
+        Value *fld = emit_ptrgep(ctx, ptr, jl_ptr_offset(sty, i) * sizeof(jl_value_t*));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
         ai.decorateInst(ctx.builder.CreateStore(Constant::getNullValue(T_prjlvalue), fld));
     }
@@ -2238,8 +2464,10 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ)
     if (jl_is_type_type(typ)) {
         assert(is_uniquerep_Type(typ));
         // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort
-        jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
         constant.constant = jl_tparam0(typ);
+        if (typ == (jl_value_t*)jl_typeofbottom_type->super)
+            constant.isghost = true;
         return constant;
     }
     return jl_cgval_t(typ);
@@ -2252,7 +2480,7 @@ static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_datatype_t *typ)
 static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv)
 {
     jl_value_t *typ;
-    if (jl_is_type(jv)) {
+    if (jl_is_type(jv) && jv != jl_bottom_type) {
         typ = (jl_value_t*)jl_wrap_Type(jv); // TODO: gc-root this?
     }
     else {
@@ -2260,16 +2488,16 @@ static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv)
         if (jl_is_datatype_singleton((jl_datatype_t*)typ))
             return ghostValue(ctx, typ);
     }
-    jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+    jl_cgval_t constant(NULL, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     constant.constant = jv;
     return constant;
 }
 
 
-static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa)
+static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa, ArrayRef<Value*> inline_roots=None)
 {
     // this enables lazy-copying of immutable values and stack or argument slots
-    jl_cgval_t tagval(v, false, typ, tindex, tbaa);
+    jl_cgval_t tagval(v, false, typ, tindex, tbaa, inline_roots);
     return tagval;
 }
 
@@ -2289,22 +2517,41 @@ static bool valid_as_globalinit(const Value *v) {
 
 static Value *zext_struct(jl_codectx_t &ctx, Value *V);
 
+// TODO: in the future, assume all callers will handle the interior pointers separately, and have
+// have zext_struct strip them out, so we aren't saving those to the stack here causing shadow stores
+// to be necessary too
 static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_t *typ, Value *tindex)
 {
     Value *loc;
     v = zext_struct(ctx, v);
+    Align align(julia_alignment(typ));
     if (valid_as_globalinit(v)) { // llvm can't handle all the things that could be inside a ConstantExpr
         assert(jl_is_concrete_type(typ)); // not legal to have an unboxed abstract type
-        loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), Align(julia_alignment(typ)), "_j_const", *jl_Module);
+        loc = get_pointer_to_constant(ctx.emission_context, cast<Constant>(v), align, "_j_const", *jl_Module);
     }
     else {
-        loc = emit_static_alloca(ctx, v->getType());
-        ctx.builder.CreateStore(v, loc);
+        loc = emit_static_alloca(ctx, v->getType(), align);
+        ctx.builder.CreateAlignedStore(v, loc, align);
     }
     return mark_julia_slot(loc, typ, tindex, ctx.tbaa().tbaa_stack);
 }
 static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, const jl_cgval_t &v)
 {
+    if (!v.inline_roots.empty()) {
+        //if (v.V == nullptr) {
+        //    AllocaInst *loc = emit_static_roots(ctx, v.inline_roots.size());
+        //    for (size_t i = 0; i < v.inline_roots.counts(); i++)
+        //        ctx.builder.CreateAlignedStore(v.inline_roots[i], emit_ptrgep(ctx, loc, i * sizeof(void*)), Align(sizeof(void*)));
+        //    return mark_julia_slot(loc, v.typ, v.TIndex, ctx.tbaa().tbaa_gcframe);
+        //}
+        Align align(julia_alignment(v.typ));
+        Type *ty = julia_type_to_llvm(ctx, v.typ);
+        AllocaInst *loc = emit_static_alloca(ctx, ty, align);
+        auto tbaa = v.V == nullptr ? ctx.tbaa().tbaa_gcframe : ctx.tbaa().tbaa_stack;
+        auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
+        recombine_value(ctx, v, loc, stack_ai, align, false);
+        return mark_julia_slot(loc, v.typ, v.TIndex, tbaa);
+    }
     if (v.ispointer())
         return v;
     return value_to_pointer(ctx, v.V, v.typ, v.TIndex);
@@ -2317,7 +2564,8 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
             // replace T::Type{T} with T
             return ghostValue(ctx, typ);
         }
-    } else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
+    }
+    else if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
         // no need to explicitly load/store a constant/ghost value
         return ghostValue(ctx, typ);
     }
@@ -2325,13 +2573,14 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
     if (type_is_ghost(T)) {
         return ghostValue(ctx, typ);
     }
-    if (v && !isboxed && v->getType()->isAggregateType() && CountTrackedPointers(v->getType()).count == 0) {
+    if (v && !isboxed && v->getType()->isAggregateType()) {
         // eagerly put this back onto the stack
         // llvm mem2reg pass will remove this if unneeded
-        return value_to_pointer(ctx, v, typ, NULL);
+        if (CountTrackedPointers(v->getType()).count == 0)
+            return value_to_pointer(ctx, v, typ, NULL);
     }
     if (isboxed)
-        return jl_cgval_t(v, isboxed, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        return jl_cgval_t(v, isboxed, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     return jl_cgval_t(v, typ, NULL);
 }
 
@@ -2366,7 +2615,7 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &
             if (alwaysboxed) {
                 // discovered that this union-split type must actually be isboxed
                 if (v.Vboxed) {
-                    return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+                    return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), v.inline_roots);
                 }
                 else {
                     // type mismatch (there weren't any boxed values in the union)
@@ -2409,7 +2658,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 {
     assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things");
     if (vi.usedUndef) {
-        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()));
+        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()), Align(1));
         setName(ctx.emission_context, vi.defFlag, "isdefined");
         store_def_flag(ctx, vi, false);
     }
@@ -2595,14 +2844,14 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                             decay_derived(ctx, boxv),
                             decay_derived(ctx, slotv));
             }
-            jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa);
+            jl_cgval_t newv = jl_cgval_t(slotv, false, typ, new_tindex, tbaa, v.inline_roots);
             assert(boxv->getType() == ctx.types().T_prjlvalue);
             newv.Vboxed = boxv;
             return newv;
         }
     }
     else {
-        return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+        return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ), None);
     }
     return jl_cgval_t(v, typ, new_tindex);
 }
@@ -2633,7 +2882,7 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
             if (skip) {
                 *skip = ctx.builder.CreateNot(emit_exactly_isa(ctx, v, (jl_datatype_t*)typ, true));
             }
-            return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+            return jl_cgval_t(v.Vboxed, true, typ, NULL, best_tbaa(ctx.tbaa(), typ), v.inline_roots);
         }
         if (mustbox_union) {
             // type mismatch: there weren't any boxed values in the union
@@ -2655,7 +2904,7 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
             unsigned new_idx = get_box_tindex((jl_datatype_t*)v.typ, typ);
             if (new_idx) {
                 new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), new_idx);
-                if (v.V && !v.ispointer()) {
+                if (v.V && v.inline_roots.empty() && !v.ispointer()) {
                     // TODO: remove this branch once all consumers of v.TIndex understand how to handle a non-ispointer value
                     return jl_cgval_t(value_to_pointer(ctx, v), typ, new_tindex);
                 }
@@ -2679,7 +2928,7 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
         }
         if (makeboxed) {
             // convert to a simple isboxed value
-            return jl_cgval_t(boxed(ctx, v), true, typ, NULL, best_tbaa(ctx.tbaa(), typ));
+            return mark_julia_type(ctx, boxed(ctx, v), true, typ);
         }
     }
     return jl_cgval_t(v, typ, new_tindex);
@@ -2928,10 +3177,11 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
     if (jl_is_globalref(ex)) {
         s = jl_globalref_name(ex);
         jl_binding_t *b = jl_get_binding(jl_globalref_mod(ex), s);
-        if (b && b->constp) {
+        jl_value_t *v = jl_get_binding_value_if_const(b);
+        if (v) {
             if (b->deprecated)
                 cg_bdw(ctx, s, b);
-            return jl_atomic_load_relaxed(&b->value);
+            return v;
         }
         return NULL;
     }
@@ -2950,10 +3200,11 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                     s = (jl_sym_t*)static_eval(ctx, jl_exprarg(e, 2));
                     if (s && jl_is_symbol(s)) {
                         jl_binding_t *b = jl_get_binding(m, s);
-                        if (b && b->constp) {
+                        jl_value_t *v = jl_get_binding_value_if_const(b);
+                        if (v) {
                             if (b->deprecated)
                                 cg_bdw(ctx, s, b);
-                            return jl_atomic_load_relaxed(&b->value);
+                            return v;
                         }
                     }
                 }
@@ -3190,18 +3441,53 @@ static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val)
 
 static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *name, AtomicOrdering order)
 {
-    jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, name, &bnd, false, false);
-    if (bp == NULL)
-        return jl_cgval_t();
-    bp = julia_binding_pvalue(ctx, bp);
-    jl_value_t *ty = nullptr;
-    if (bnd) {
-        jl_value_t *v = jl_atomic_load_acquire(&bnd->value); // acquire value for ty
-        if (v != NULL && bnd->constp)
-            return mark_julia_const(ctx, v);
-        ty = jl_atomic_load_relaxed(&bnd->ty);
+    jl_binding_t *bnd = jl_get_module_binding(mod, name, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bnd, ctx.max_world);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+        // try to look this up now.
+        // TODO: This is bad and we'd like to delete it.
+        jl_get_binding(mod, name);
+    }
+    assert(bnd);
+    Value *bp = NULL;
+    // bpart was updated in place - this will change with full partition
+    pku = jl_atomic_load_acquire(&bpart->restriction);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+        // Redo the lookup at runtime
+        bp = julia_binding_gv(ctx, bnd);
+        Value *v = ctx.builder.CreateCall(prepare_call(jlgetbindingvalue_func), { bp });
+        undef_var_error_ifnot(ctx, ctx.builder.CreateIsNotNull(v), name, (jl_value_t*)mod);
+        return mark_julia_type(ctx, v, true, jl_any_type);
+    } else {
+        while (true) {
+            if (!bpart)
+                break;
+            if (!jl_bkind_is_some_import(decode_restriction_kind(pku)))
+                break;
+            if (bnd->deprecated) {
+                cg_bdw(ctx, name, bnd);
+            }
+            bnd = (jl_binding_t*)decode_restriction_value(pku);
+            bpart = jl_get_binding_partition(bnd, ctx.max_world);
+            pku = jl_atomic_load_acquire(&bpart->restriction);
+        }
+        if (bpart && jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_value_t *constval = decode_restriction_value(pku);
+            if (!constval) {
+                undef_var_error_ifnot(ctx, ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), name, (jl_value_t*)mod);
+                return jl_cgval_t();
+            }
+            return mark_julia_const(ctx, constval);
+        }
+    }
+    bp = julia_binding_gv(ctx, bnd);
+    if (bnd->deprecated) {
+        cg_bdw(ctx, name, bnd);
     }
+    assert(decode_restriction_kind(pku) == BINDING_KIND_GLOBAL);
+    jl_value_t *ty = decode_restriction_value(pku);
+    bp = julia_binding_pvalue(ctx, bp);
     if (ty == nullptr)
         ty = (jl_value_t*)jl_any_type;
     return update_julia_type(ctx, emit_checked_var(ctx, bp, name, (jl_value_t*)mod, false, ctx.tbaa().tbaa_binding), ty);
@@ -3214,43 +3500,47 @@ static jl_cgval_t emit_globalop(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *s
 {
     jl_binding_t *bnd = NULL;
     Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true, alloc);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bnd, ctx.max_world);
     if (bp == NULL)
         return jl_cgval_t();
-    if (bnd && !bnd->constp) {
-        jl_value_t *ty = jl_atomic_load_relaxed(&bnd->ty);
-        if (ty != nullptr) {
-            const std::string fname = issetglobal ? "setglobal!" : isreplaceglobal ? "replaceglobal!" : isswapglobal ? "swapglobal!" : ismodifyglobal ? "modifyglobal!" : "setglobalonce!";
-            if (!ismodifyglobal) {
-                // TODO: use typeassert in jl_check_binding_wr too
-                emit_typecheck(ctx, rval, ty, "typeassert");
-                rval = update_julia_type(ctx, rval, ty);
-                if (rval.typ == jl_bottom_type)
-                    return jl_cgval_t();
-            }
-            bool isboxed = true;
-            bool maybe_null = jl_atomic_load_relaxed(&bnd->value) == NULL;
-            return typed_store(ctx,
-                               julia_binding_pvalue(ctx, bp),
-                               rval, cmp, ty,
-                               ctx.tbaa().tbaa_binding,
-                               nullptr,
-                               bp,
-                               isboxed,
-                               Order,
-                               FailOrder,
-                               0,
-                               nullptr,
-                               issetglobal,
-                               isreplaceglobal,
-                               isswapglobal,
-                               ismodifyglobal,
-                               issetglobalonce,
-                               maybe_null,
-                               modifyop,
-                               fname,
-                               mod,
-                               sym);
+    if (bpart) {
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        if (!jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_value_t *ty = decode_restriction_value(pku);
+            if (ty != nullptr) {
+                const std::string fname = issetglobal ? "setglobal!" : isreplaceglobal ? "replaceglobal!" : isswapglobal ? "swapglobal!" : ismodifyglobal ? "modifyglobal!" : "setglobalonce!";
+                if (!ismodifyglobal) {
+                    // TODO: use typeassert in jl_check_binding_wr too
+                    emit_typecheck(ctx, rval, ty, "typeassert");
+                    rval = update_julia_type(ctx, rval, ty);
+                    if (rval.typ == jl_bottom_type)
+                        return jl_cgval_t();
+                }
+                bool isboxed = true;
+                bool maybe_null = jl_atomic_load_relaxed(&bnd->value) == NULL;
+                return typed_store(ctx,
+                                julia_binding_pvalue(ctx, bp),
+                                rval, cmp, ty,
+                                ctx.tbaa().tbaa_binding,
+                                nullptr,
+                                bp,
+                                isboxed,
+                                Order,
+                                FailOrder,
+                                0,
+                                nullptr,
+                                issetglobal,
+                                isreplaceglobal,
+                                isswapglobal,
+                                ismodifyglobal,
+                                issetglobalonce,
+                                maybe_null,
+                                modifyop,
+                                fname,
+                                mod,
+                                sym);
 
+            }
         }
     }
     Value *m = literal_pointer_val(ctx, (jl_value_t*)mod);
@@ -3374,11 +3664,14 @@ static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty,
     size_t padding_bytes = 0;
     size_t nfields = jl_datatype_nfields(aty);
     size_t total_size = jl_datatype_size(aty);
+    assert(aty->layout->flags.isbitsegal);
     for (size_t i = 0; i < nfields; ++i) {
         size_t offset = jl_field_offset(aty, i);
         size_t fend = i == nfields - 1 ? total_size : jl_field_offset(aty, i + 1);
         size_t fsz = jl_field_size(aty, i);
         jl_datatype_t *fty = (jl_datatype_t*)jl_field_type(aty, i);
+        assert(jl_is_datatype(fty)); // union fields should never reach here
+        assert(fty->layout->flags.isbitsegal);
         if (jl_field_isptr(aty, i) || !fty->layout->flags.haspadding) {
             // The field has no internal padding
             data_bytes += fsz;
@@ -3414,25 +3707,26 @@ static size_t emit_masked_bits_compare(callback &emit_desc, jl_datatype_t *aty,
 static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
 {
     ++EmittedBitsCompares;
+    jl_value_t *argty = (arg1.constant ? jl_typeof(arg1.constant) : arg1.typ);
     bool isboxed;
     Type *at = julia_type_to_llvm(ctx, arg1.typ, &isboxed);
-    assert(jl_is_datatype(arg1.typ) && arg1.typ == arg2.typ && !isboxed);
+    assert(jl_is_datatype(arg1.typ) && arg1.typ == (arg2.constant ? jl_typeof(arg2.constant) : arg2.typ) && !isboxed);
 
     if (type_is_ghost(at))
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
 
     if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
         Type *at_int = INTT(at, ctx.emission_context.DL);
-        Value *varg1 = emit_unbox(ctx, at_int, arg1, arg1.typ);
-        Value *varg2 = emit_unbox(ctx, at_int, arg2, arg2.typ);
+        Value *varg1 = emit_unbox(ctx, at_int, arg1, argty);
+        Value *varg2 = emit_unbox(ctx, at_int, arg2, argty);
         return ctx.builder.CreateICmpEQ(varg1, varg2);
     }
 
     if (at->isVectorTy()) {
-        jl_svec_t *types = ((jl_datatype_t*)arg1.typ)->types;
+        jl_svec_t *types = ((jl_datatype_t*)argty)->types;
         Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
-        Value *varg1 = emit_unbox(ctx, at, arg1, arg1.typ);
-        Value *varg2 = emit_unbox(ctx, at, arg2, arg2.typ);
+        Value *varg1 = emit_unbox(ctx, at, arg1, argty);
+        Value *varg2 = emit_unbox(ctx, at, arg2, argty);
         for (size_t i = 0, l = jl_svec_len(types); i < l; i++) {
             jl_value_t *fldty = jl_svecref(types, i);
             Value *subAns, *fld1, *fld2;
@@ -3447,12 +3741,12 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
     }
 
     if (at->isAggregateType()) { // Struct or Array
-        jl_datatype_t *sty = (jl_datatype_t*)arg1.typ;
+        jl_datatype_t *sty = (jl_datatype_t*)argty;
         size_t sz = jl_datatype_size(sty);
         if (sz > 512 && !sty->layout->flags.haspadding && sty->layout->flags.isbitsegal) {
-            Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) :
+            Value *varg1 = arg1.inline_roots.empty() && arg1.ispointer() ? data_pointer(ctx, arg1) :
                 value_to_pointer(ctx, arg1).V;
-            Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) :
+            Value *varg2 = arg2.inline_roots.empty() && arg2.ispointer() ? data_pointer(ctx, arg2) :
                 value_to_pointer(ctx, arg2).V;
             varg1 = emit_pointer_from_objref(ctx, varg1);
             varg2 = emit_pointer_from_objref(ctx, varg2);
@@ -3487,11 +3781,9 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
             return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
         }
         else if (sz > 512 && jl_struct_try_layout(sty) && sty->layout->flags.isbitsegal) {
-            Type *TInt8 = getInt8Ty(ctx.builder.getContext());
-            Type *TInt1 = getInt1Ty(ctx.builder.getContext());
-            Value *varg1 = arg1.ispointer() ? data_pointer(ctx, arg1) :
+            Value *varg1 = arg1.inline_roots.empty() && arg1.ispointer() ? data_pointer(ctx, arg1) :
                 value_to_pointer(ctx, arg1).V;
-            Value *varg2 = arg2.ispointer() ? data_pointer(ctx, arg2) :
+            Value *varg2 = arg2.inline_roots.empty() && arg2.ispointer() ? data_pointer(ctx, arg2) :
                 value_to_pointer(ctx, arg2).V;
             varg1 = emit_pointer_from_objref(ctx, varg1);
             varg2 = emit_pointer_from_objref(ctx, varg2);
@@ -3507,8 +3799,8 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
                 Value *ptr1 = varg1;
                 Value *ptr2 = varg2;
                 if (desc.offset != 0) {
-                    ptr1 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, ptr1, desc.offset);
-                    ptr2 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, ptr2, desc.offset);
+                    ptr1 = emit_ptrgep(ctx, ptr1, desc.offset);
+                    ptr2 = emit_ptrgep(ctx, ptr2, desc.offset);
                 }
 
                 Value *new_ptr1 = ptr1;
@@ -3518,7 +3810,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
                 PHINode *answerphi = nullptr;
                 if (desc.nrepeats != 1) {
                     // Set up loop
-                    endptr1 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes));;
+                    endptr1 = emit_ptrgep(ctx, ptr1, desc.nrepeats * (desc.data_bytes + desc.padding_bytes));;
 
                     BasicBlock *currBB = ctx.builder.GetInsertBlock();
                     loopBB = BasicBlock::Create(ctx.builder.getContext(), "egal_loop", ctx.f);
@@ -3526,6 +3818,7 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
                     ctx.builder.CreateBr(loopBB);
 
                     ctx.builder.SetInsertPoint(loopBB);
+                    Type *TInt1 = getInt1Ty(ctx.builder.getContext());
                     answerphi = ctx.builder.CreatePHI(TInt1, 2);
                     answerphi->addIncoming(answer ? answer : ConstantInt::get(TInt1, 1), currBB);
                     answer = answerphi;
@@ -3533,11 +3826,11 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
                     PHINode *itr1 = ctx.builder.CreatePHI(ptr1->getType(), 2);
                     PHINode *itr2 = ctx.builder.CreatePHI(ptr2->getType(), 2);
 
-                    new_ptr1 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, itr1, desc.data_bytes + desc.padding_bytes);
+                    new_ptr1 = emit_ptrgep(ctx, itr1, desc.data_bytes + desc.padding_bytes);
                     itr1->addIncoming(ptr1, currBB);
                     itr1->addIncoming(new_ptr1, loopBB);
 
-                    Value *new_ptr2 = ctx.builder.CreateConstInBoundsGEP1_32(TInt8, itr2, desc.data_bytes + desc.padding_bytes);
+                    Value *new_ptr2 = emit_ptrgep(ctx, itr2, desc.data_bytes + desc.padding_bytes);
                     itr2->addIncoming(ptr2, currBB);
                     itr2->addIncoming(new_ptr2, loopBB);
 
@@ -3616,8 +3909,8 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     if (arg1.constant && arg2.constant)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), jl_egal(arg1.constant, arg2.constant));
 
-    jl_value_t *rt1 = arg1.typ;
-    jl_value_t *rt2 = arg2.typ;
+    jl_value_t *rt1 = (arg1.constant ? jl_typeof(arg1.constant) : arg1.typ);
+    jl_value_t *rt2 = (arg2.constant ? jl_typeof(arg2.constant) : arg2.typ);
     if (jl_is_concrete_type(rt1) && jl_is_concrete_type(rt2) && !jl_is_kind(rt1) && !jl_is_kind(rt2) && rt1 != rt2) {
         // disjoint concrete leaf types are never equal (quick test)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
@@ -3652,8 +3945,10 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     if (jl_type_intersection(rt1, rt2) == (jl_value_t*)jl_bottom_type) // types are disjoint (exhaustive test)
         return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
 
-    bool justbits1 = jl_is_concrete_immutable(rt1);
-    bool justbits2 = jl_is_concrete_immutable(rt2);
+    // can compare any concrete immutable by bits, except for UnionAll
+    // which has a special non-bits based egal
+    bool justbits1 = jl_is_concrete_immutable(rt1) && !jl_is_kind(rt1);
+    bool justbits2 = jl_is_concrete_immutable(rt2) && !jl_is_kind(rt2);
     if (justbits1 || justbits2) { // whether this type is unique'd by value
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] () -> Value* {
             jl_datatype_t *typ = (jl_datatype_t*)(justbits1 ? rt1 : rt2);
@@ -4019,7 +4314,7 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
             data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
         }
-        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx0);
+        ptindex = emit_ptrgep(ctx, ptindex, idx0);
         *ret = union_store(ctx, data, ptindex, val, cmp, ety,
             ctx.tbaa().tbaa_arraybuf, ctx.tbaa().tbaa_arrayselbyte,
             Order, FailOrder,
@@ -4034,7 +4329,7 @@ static bool emit_f_opmemory(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             assert(ptr);
             lock = ptr;
             // ptr += sizeof(lock);
-            ptr = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+            ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
         }
         Value *data_owner = NULL; // owner object against which the write barrier must check
         if (isboxed || layout->first_ptr >= 0) { // if elements are just bits, don't need a write barrier
@@ -4149,9 +4444,15 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 #ifdef _P64
                 nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
 #endif
-                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs));
+                Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*));
                 Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
                 *ret = mark_julia_type(ctx, r, true, jl_any_type);
+                if (trim_may_error(ctx.params->trim)) {
+                    // if we know the return type, we can assume the result is of that type
+                    errs() << "ERROR: Dynamic call to Core._apply_iterate detected\n";
+                    errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                    print_stacktrace(ctx, ctx.params->trim);
+                }
                 return true;
             }
         }
@@ -4299,7 +4600,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     ptindex = ctx.builder.CreateInBoundsGEP(AT, data, mlen);
                     data = ctx.builder.CreateInBoundsGEP(AT, data, idx0);
                 }
-                ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx0);
+                ptindex = emit_ptrgep(ctx, ptindex, idx0);
                 size_t elsz_c = 0, al_c = 0;
                 int union_max = jl_islayout_inline(ety, &elsz_c, &al_c);
                 assert(union_max && LLT_ALIGN(elsz_c, al_c) == elsz && al_c == al);
@@ -4312,7 +4613,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     assert(ptr);
                     lock = ptr;
                     // ptr += sizeof(lock);
-                    ptr = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+                    ptr = emit_ptrgep(ctx, ptr, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
                     emit_lockstate_value(ctx, lock, true);
                 }
                 *ret = typed_load(ctx, ptr, nullptr, ety,
@@ -4403,10 +4704,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 if (needlock) {
                     // n.b. no actual lock acquire needed, as the check itself only needs to load a single pointer and check for null
                     // elem += sizeof(lock);
-                    elem = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
+                    elem = emit_ptrgep(ctx, elem, LLT_ALIGN(sizeof(jl_mutex_t), JL_SMALL_BYTE_ALIGNMENT));
                 }
                 if (!isboxed)
-                    elem = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, elem, layout->first_ptr);
+                    elem = emit_ptrgep(ctx, elem, layout->first_ptr * sizeof(void*));
                 // emit this using the same type as jl_builtin_memoryrefget
                 // so that LLVM may be able to load-load forward them and fold the result
                 auto tbaa = isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf;
@@ -4494,7 +4795,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
                         Value *valen = emit_n_varargs(ctx);
                         jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check (it only checks the `.V` field)
-                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(ctx.types().T_size, ctx.nReqArgs)),
+                                emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*)),
                                 NULL, NULL);
                         Value *idx = emit_unbox(ctx, ctx.types().T_size, fld, (jl_value_t*)jl_long_type);
                         idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck);
@@ -4535,34 +4836,33 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     // For tuples, we can emit code even if we don't know the exact
                     // type (e.g. because we don't know the length). This is possible
                     // as long as we know that all elements are of the same (leaf) type.
-                    if (obj.ispointer()) {
-                        if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
-                            emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
-                            *ret = jl_cgval_t(); // unreachable
-                            return true;
-                        }
-                        // Determine which was the type that was homogeneous
-                        jl_value_t *jt = jl_tparam0(utt);
-                        if (jl_is_vararg(jt))
-                            jt = jl_unwrap_vararg(jt);
-                        assert(jl_is_datatype(jt));
-                        // This is not necessary for correctness, but allows to omit
-                        // the extra code for getting the length of the tuple
-                        if (!bounds_check_enabled(ctx, boundscheck)) {
-                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
-                        }
-                        else {
-                            vidx = emit_bounds_check(ctx, obj, (jl_value_t*)obj.typ, vidx,
-                                emit_datatype_nfields(ctx, emit_typeof(ctx, obj, false, false)),
-                                jl_true);
-                        }
-                        bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
-                        Value *ptr = data_pointer(ctx, obj);
-                        *ret = typed_load(ctx, ptr, vidx,
-                                isboxed ? (jl_value_t*)jl_any_type : jt,
-                                obj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false);
+                    jl_cgval_t ptrobj = obj.isboxed ? obj : value_to_pointer(ctx, obj);
+                    if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                        emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+                        *ret = jl_cgval_t(); // unreachable
                         return true;
                     }
+                    // Determine which was the type that was homogeneous
+                    jl_value_t *jt = jl_tparam0(utt);
+                    if (jl_is_vararg(jt))
+                        jt = jl_unwrap_vararg(jt);
+                    assert(jl_is_datatype(jt));
+                    // This is not necessary for correctness, but allows to omit
+                    // the extra code for getting the length of the tuple
+                    if (!bounds_check_enabled(ctx, boundscheck)) {
+                        vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(ctx.types().T_size, 1));
+                    }
+                    else {
+                        vidx = emit_bounds_check(ctx, ptrobj, (jl_value_t*)ptrobj.typ, vidx,
+                            emit_datatype_nfields(ctx, emit_typeof(ctx, ptrobj, false, false)),
+                            jl_true);
+                    }
+                    bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
+                    Value *ptr = data_pointer(ctx, ptrobj);
+                    *ret = typed_load(ctx, ptr, vidx,
+                            isboxed ? (jl_value_t*)jl_any_type : jt,
+                            ptrobj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false);
+                    return true;
                 }
 
                 // Unknown object, but field known to be integer
@@ -4839,12 +5139,17 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) {
             Value *fldv;
             size_t offs = jl_field_offset(stt, fieldidx) / sizeof(jl_value_t*);
-            if (obj.ispointer()) {
+            if (!obj.inline_roots.empty()) {
+                auto offsets = split_value_field(stt, fieldidx);
+                assert(offsets.second >= 0);
+                fldv = obj.inline_roots[offsets.second];
+            }
+            else if (obj.ispointer()) {
                 auto tbaa = best_field_tbaa(ctx, obj, stt, fieldidx, offs);
                 if (!jl_field_isptr(stt, fieldidx))
                     offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr;
                 Value *ptr = data_pointer(ctx, obj);
-                Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, ptr, offs);
+                Value *addr = emit_ptrgep(ctx, ptr, offs * sizeof(jl_value_t*));
                 // emit this using the same type as emit_getfield_knownidx
                 // so that LLVM may be able to load-load forward them and fold the result
                 jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa);
@@ -4872,6 +5177,14 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         return true;
     }
 
+    else if (f == jl_builtin_current_scope && (nargs == 0)) {
+        jl_aliasinfo_t scope_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        Instruction *v = scope_ai.decorateInst(
+            ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, get_scope_field(ctx), ctx.types().alignof_ptr));
+        *ret = mark_julia_type(ctx, v, /*boxed*/ true, rt);
+        return true;
+    }
+
     else if (f == jl_builtin_donotdelete) {
         // For now we emit this as a vararg call to the builtin
         // (which doesn't look at the arguments). In the future,
@@ -4939,46 +5252,29 @@ static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction<> *theFptr, Value
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, trampoline);
 }
 
-static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
-                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, jl_returninfo_t &returninfo, jl_code_instance_t *fromexternal,
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs)
 {
     ++EmittedSpecfunCalls;
     // emit specialized call site
     bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
     FunctionType *cft = returninfo.decl.getFunctionType();
-    *cc = returninfo.cc;
-    *return_roots = returninfo.return_roots;
-
     size_t nfargs = cft->getNumParams();
     SmallVector<Value *, 0> argvals(nfargs);
     unsigned idx = 0;
     AllocaInst *result = nullptr;
-    switch (returninfo.cc) {
-    case jl_returninfo_t::Boxed:
-    case jl_returninfo_t::Register:
-    case jl_returninfo_t::Ghosts:
-        break;
-    case jl_returninfo_t::SRet:
-        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
-        #if JL_LLVM_VERSION < 170000
-        assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-        #endif
-        argvals[idx] = result;
-        idx++;
-        break;
-    case jl_returninfo_t::Union:
-        result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes));
+
+    if (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union) {
+        result = emit_static_alloca(ctx, returninfo.union_bytes, Align(returninfo.union_align));
         setName(ctx.emission_context, result, "sret_box");
-        if (returninfo.union_align > 1)
-            result->setAlignment(Align(returninfo.union_align));
         argvals[idx] = result;
         idx++;
-        break;
     }
 
+    AllocaInst *return_roots = nullptr;
     if (returninfo.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots));
+        assert(returninfo.cc == jl_returninfo_t::SRet);
+        return_roots = emit_static_roots(ctx, returninfo.return_roots);
         argvals[idx] = return_roots;
         idx++;
     }
@@ -4989,18 +5285,30 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *jt = jl_nth_slot_type(specTypes, i);
         // n.b.: specTypes is required to be a datatype by construction for specsig
-        jl_cgval_t arg = argv[i];
         if (is_opaque_closure && i == 0) {
-            // Special optimization for opaque closures: We know that specsig opaque
-            // closures don't look at their type tag (they are fairly quickly discarded
-            // for their environments). Therefore, we can just pass these as a pointer,
-            // rather than a boxed value.
-            arg = value_to_pointer(ctx, arg);
-            argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
-        }
-        else if (is_uniquerep_Type(jt)) {
+            // Special implementation for opaque closures: their jt and thus
+            // julia_type_to_llvm values are likely wrong, so override the
+            // behavior here to directly pass the expected pointer based instead
+            // just on passing arg as a pointer
+            jl_cgval_t arg = argv[i];
+            if (arg.isghost) {
+                argvals[idx] = Constant::getNullValue(ctx.builder.getPtrTy(AddressSpace::Derived));
+            }
+            else {
+                if (!arg.isboxed)
+                    arg = value_to_pointer(ctx, arg);
+                argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
+            }
+            idx++;
             continue;
-        } else {
+        }
+        jl_cgval_t arg = update_julia_type(ctx, argv[i], jt);
+        if (arg.typ == jl_bottom_type)
+            return jl_cgval_t();
+        if (is_uniquerep_Type(jt)) {
+            continue;
+        }
+        else {
             bool isboxed = deserves_argbox(jt);
             Type *et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
             if (type_is_ghost(et))
@@ -5010,9 +5318,24 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
                 argvals[idx] = boxed(ctx, arg);
             }
             else if (et->isAggregateType()) {
-                arg = value_to_pointer(ctx, arg);
-                // can lazy load on demand, no copy needed
-                argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
+                auto tracked = CountTrackedPointers(et);
+                if (tracked.count && !tracked.all) {
+                    Value *val = arg.V;
+                    SmallVector<Value*,0> roots(arg.inline_roots);
+                    if (roots.empty())
+                        std::tie(val, roots) = split_value(ctx, arg, Align(julia_alignment(jt)));
+                    AllocaInst *proots = emit_static_roots(ctx, roots.size());
+                    for (size_t i = 0; i < roots.size(); i++)
+                        ctx.builder.CreateAlignedStore(roots[i], emit_ptrgep(ctx, proots, i * sizeof(void*)), Align(sizeof(void*)));
+                    assert(val);
+                    argvals[idx++] = decay_derived(ctx, val);
+                    argvals[idx] = proots;
+                }
+                else {
+                    if (!arg.isboxed)
+                        arg = value_to_pointer(ctx, arg);
+                    argvals[idx] = decay_derived(ctx, data_pointer(ctx, arg));
+                }
             }
             else {
                 Value *val = emit_unbox(ctx, et, arg, jt);
@@ -5058,7 +5381,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
             break;
         case jl_returninfo_t::SRet:
             assert(result);
-            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
+            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_gcframe, load_gc_roots(ctx, return_roots, returninfo.return_roots));
             break;
         case jl_returninfo_t::Union: {
             Value *box = ctx.builder.CreateExtractValue(call, 0);
@@ -5067,7 +5390,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
                 ctx.builder.CreateICmpEQ(
                         ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
-                decay_derived(ctx, argvals[0]),
+                decay_derived(ctx, result),
                 decay_derived(ctx, box)
             );
             retval = mark_julia_slot(derived,
@@ -5081,6 +5404,19 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos
             retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack);
             break;
     }
+    return retval;
+}
+
+static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_closure, jl_value_t *specTypes, jl_value_t *jlretty, llvm::Value *callee, StringRef specFunctionObject, jl_code_instance_t *fromexternal,
+                                          ArrayRef<jl_cgval_t> argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *nreturn_roots, jl_value_t *inferred_retty)
+{
+    ++EmittedSpecfunCalls;
+    // emit specialized call site
+    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
+    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, callee, specFunctionObject, specTypes, jlretty, is_opaque_closure, gcstack_arg);
+    *cc = returninfo.cc;
+    *nreturn_roots = returninfo.return_roots;
+    jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, specTypes, jlretty, returninfo, fromexternal, argv, nargs);
     // see if inference has a different / better type for the call than the lambda
     return update_julia_type(ctx, retval, inferred_retty);
 }
@@ -5224,12 +5560,25 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, ArrayR
                     if (need_to_emit) {
                         Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
                         ctx.call_targets[codeinst] = {cc, return_roots, trampoline_decl, specsig};
+                        if (trim_may_error(ctx.params->trim))
+                            push_frames(ctx, ctx.linfo, mi);
                     }
                 }
             }
         }
     }
     if (!handled) {
+        if (trim_may_error(ctx.params->trim)) {
+            if (lival.constant) {
+                arraylist_push(&new_invokes, lival.constant);
+                push_frames(ctx, ctx.linfo, (jl_method_instance_t*)lival.constant);
+            } else {
+                errs() << "Dynamic call to unknown function";
+                errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+
+                print_stacktrace(ctx, ctx.params->trim);
+            }
+        }
         Value *r = emit_jlcall(ctx, jlinvoke_func, boxed(ctx, lival), argv, nargs, julia_call2);
         result = mark_julia_type(ctx, r, true, rt);
     }
@@ -5289,7 +5638,12 @@ static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_
             return mark_julia_type(ctx, oldnew, true, rt);
         }
     }
-
+    if (trim_may_error(ctx.params->trim)) {
+        errs() << "ERROR: dynamic invoke modify call to";
+        jl_(args[0]);
+        errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+        print_stacktrace(ctx, ctx.params->trim);
+    }
     // emit function and arguments
     Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
@@ -5359,10 +5713,15 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
             bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex, is_promotable);
             if (handled)
                 return result;
-
+            jl_fptr_args_t builtin_fptr = jl_get_builtin_fptr((jl_datatype_t*)jl_typeof(f.constant));
             // special case for some known builtin not handled by emit_builtin_call
-            auto it = builtin_func_map().find(jl_get_builtin_fptr((jl_datatype_t*)jl_typeof(f.constant)));
+            auto it = builtin_func_map().find(builtin_fptr);
             if (it != builtin_func_map().end()) {
+                if (trim_may_error(ctx.params->trim) && may_dispatch_builtins().count(builtin_fptr)) {
+                    errs() << "ERROR: Dynamic call to builtin" << jl_symbol_name(((jl_datatype_t*)jl_typeof(f.constant))->name->name);
+                    errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+                    print_stacktrace(ctx, ctx.params->trim);
+                }
                 Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), ArrayRef<jl_cgval_t>(argv).drop_front(), nargs - 1, julia_call);
                 setName(ctx.emission_context, ret, it->second->name + "_ret");
                 return mark_julia_type(ctx, ret, true, rt);
@@ -5373,7 +5732,7 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
         JuliaFunction<> *cc;
         if (f.typ == (jl_value_t*)jl_intrinsic_type) {
             fptr = prepare_call(jlintrinsic_func);
-            F = f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V;
+            F = f.inline_roots.empty() && f.ispointer() ? data_pointer(ctx, f) : value_to_pointer(ctx, f).V;
             F = decay_derived(ctx, F);
             cc = julia_call3;
         }
@@ -5401,7 +5760,79 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt, bo
             }
         }
     }
+    int failed_dispatch = !argv[0].constant;
+    if (ctx.params->trim != JL_TRIM_NO) {
+        size_t min_valid = 1;
+        size_t max_valid = ~(size_t)0;
+        size_t latest_world = jl_get_world_counter(); // TODO: marshal the world age of the compilation here.
+
+        // Find all methods matching the call signature
+        jl_array_t *matches = NULL;
+        jl_value_t *tup = NULL;
+        JL_GC_PUSH2(&tup, &matches);
+        if (!failed_dispatch) {
+            SmallVector<jl_value_t*> argtypes;
+            for (auto& arg: argv)
+                argtypes.push_back(arg.typ);
+            tup = jl_apply_tuple_type_v(argtypes.data(), argtypes.size());
+            matches = (jl_array_t*)jl_matching_methods((jl_tupletype_t*)tup, jl_nothing, 10 /*TODO: make global*/, 1,
+                                                latest_world, &min_valid, &max_valid, NULL);
+            if ((jl_value_t*)matches == jl_nothing)
+                failed_dispatch = 1;
+        }
+
+        // Expand each matching method to its unique specialization, if it has exactly one
+        if (!failed_dispatch) {
+            size_t k;
+            size_t len = new_invokes.len;
+            for (k = 0; k < jl_array_nrows(matches); k++) {
+                jl_method_match_t *match = (jl_method_match_t *)jl_array_ptr_ref(matches, k);
+                jl_method_instance_t *mi = jl_method_match_to_mi(match, latest_world, min_valid, max_valid, 0);
+                if (!mi) {
+                    if (jl_array_nrows(matches) == 1) {
+                        // if the method match is not compileable, but there is only one, fall back to
+                        // unspecialized implementation
+                        mi = jl_get_unspecialized(match->method);
+                    }
+                    else {
+                        new_invokes.len = len;
+                        failed_dispatch = 1;
+                        break;
+                    }
+                }
+                arraylist_push(&new_invokes, mi);
+            }
+        }
+        JL_GC_POP();
+    }
 
+    if (failed_dispatch && trim_may_error(ctx.params->trim)) {
+        errs() << "Dynamic call to ";
+        jl_jmp_buf *old_buf = jl_get_safe_restore();
+        jl_jmp_buf buf;
+        jl_set_safe_restore(&buf);
+        if (!jl_setjmp(buf, 0)) {
+            jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)args[0]);
+            jl_printf((JL_STREAM*)STDERR_FILENO,"(");
+            for (size_t i = 1; i < nargs; ++i) {
+                jl_value_t *typ = argv[i].typ;
+                if (!jl_is_concrete_type(typ)) // Print type in red
+                    jl_printf((JL_STREAM*)STDERR_FILENO, "\x1b[31m");
+                jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)argv[i].typ);
+                if (!jl_is_concrete_type(typ))
+                    jl_printf((JL_STREAM*)STDERR_FILENO, "\x1b[0m");
+                if (i != nargs-1)
+                    jl_printf((JL_STREAM*)STDERR_FILENO,", ");
+            }
+            jl_printf((JL_STREAM*)STDERR_FILENO,")\n");
+        }
+        else {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "\n!!! ERROR while printing error -- ABORTING !!!\n");
+        }
+        jl_set_safe_restore(old_buf);
+        errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+        print_stacktrace(ctx, ctx.params->trim);
+    }
     // emit function and arguments
     Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, n_generic_args, julia_call);
     return mark_julia_type(ctx, callval, true, rt);
@@ -5432,16 +5863,20 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
                                      jl_binding_t **pbnd, bool assign, bool alloc)
 {
     jl_binding_t *b = jl_get_module_binding(m, s, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, ctx.max_world);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
     if (assign) {
-        if (jl_atomic_load_relaxed(&b->owner) == NULL)
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
             // not yet declared
             b = NULL;
     }
     else {
-        b = jl_atomic_load_relaxed(&b->owner);
-        if (b == NULL)
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
             // try to look this up now
             b = jl_get_binding(m, s);
+            bpart = jl_get_binding_partition(b, ctx.max_world);
+        }
+        pku = jl_walk_binding_inplace(&b, &bpart, ctx.max_world);
     }
     if (b == NULL) {
         // var not found. switch to delayed lookup.
@@ -5482,7 +5917,7 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
         return p;
     }
     if (assign) {
-        if (jl_atomic_load_relaxed(&b->owner) != b) {
+        if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
             // this will fail at runtime, so defer to the runtime to create the error
             ctx.builder.CreateCall(prepare_call(jlgetbindingwrorerror_func),
                     { literal_pointer_val(ctx, (jl_value_t*)m),
@@ -5524,10 +5959,7 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
         }
     }
     assert(ctx.spvals_ptr != NULL);
-    Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-            ctx.types().T_prjlvalue,
-            ctx.spvals_ptr,
-            i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
+    Value *bp = emit_ptrgep(ctx, ctx.spvals_ptr, i * sizeof(jl_value_t*) + sizeof(jl_svec_t));
     jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
     Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
     setName(ctx.emission_context, sp, "sparam");
@@ -5580,10 +6012,7 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym, int allow_i
             }
         }
         assert(ctx.spvals_ptr != NULL);
-        Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-                ctx.types().T_prjlvalue,
-                ctx.spvals_ptr,
-                i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
+        Value *bp = emit_ptrgep(ctx, ctx.spvals_ptr, i * sizeof(jl_value_t*) + sizeof(jl_svec_t));
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
         Value *sp = ai.decorateInst(ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
         isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp, false, true), emit_tagfrom(ctx, jl_tvar_type));
@@ -5601,8 +6030,10 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym, int allow_i
             name = (jl_sym_t*)sym;
         }
         jl_binding_t *bnd = allow_import ? jl_get_binding(modu, name) : jl_get_module_binding(modu, name, 0);
-        if (bnd && jl_atomic_load_relaxed(&bnd->owner) == bnd) {
-            if (jl_atomic_load_acquire(&bnd->value) != NULL && bnd->constp)
+        jl_binding_partition_t *bpart = jl_get_binding_partition(bnd, ctx.min_world);
+        jl_ptr_kind_union_t pku = bpart ? jl_atomic_load_relaxed(&bpart->restriction) : encode_restriction(NULL, BINDING_KIND_GUARD);
+        if (decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            if (jl_get_binding_value_if_const(bnd))
                 return mark_julia_const(ctx, jl_true);
             Value *bp = julia_binding_gv(ctx, bnd);
             bp = julia_binding_pvalue(ctx, bp);
@@ -5625,42 +6056,53 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym, int allow_i
 }
 
 static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname) {
-    jl_value_t *typ = vi.value.typ;
     jl_cgval_t v;
     Value *isnull = NULL;
     if (vi.boxroot == NULL || vi.pTIndex != NULL) {
-        if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !vi.value.V) {
+        if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !(vi.value.V || vi.inline_roots)) {
             v = vi.value;
             if (vi.pTIndex)
                 v.TIndex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1));
         }
         else {
             // copy value to a non-mutable (non-volatile SSA) location
-            AllocaInst *varslot = cast<AllocaInst>(vi.value.V);
-            setName(ctx.emission_context, varslot, jl_symbol_name(varname));
-            Type *T = varslot->getAllocatedType();
-            assert(!varslot->isArrayAllocation() && "variables not expected to be VLA");
-            AllocaInst *ssaslot = cast<AllocaInst>(varslot->clone());
-            setName(ctx.emission_context, ssaslot, jl_symbol_name(varname) + StringRef(".ssa"));
-            ssaslot->insertAfter(varslot);
-            if (vi.isVolatile) {
-                Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot,
-                        varslot->getAlign(),
-                        true);
-                ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign());
-            }
-            else {
-                const DataLayout &DL = jl_Module->getDataLayout();
-                uint64_t sz = DL.getTypeStoreSize(T);
-                emit_memcpy(ctx, ssaslot, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), vi.value, sz, ssaslot->getAlign().value(), varslot->getAlign().value());
+            // since this might be a union slot, the most convenient approach to copying
+            // is to move the whole alloca chunk
+            AllocaInst *ssaslot = nullptr;
+            if (vi.value.V) {
+                auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+                AllocaInst *varslot = cast<AllocaInst>(vi.value.V);
+                Type *T = varslot->getAllocatedType();
+                assert(!varslot->isArrayAllocation() && "variables not expected to be VLA");
+                ssaslot = cast<AllocaInst>(varslot->clone());
+                setName(ctx.emission_context, ssaslot, varslot->getName() + StringRef(".ssa"));
+                ssaslot->insertAfter(varslot);
+                if (vi.isVolatile) {
+                    Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot, varslot->getAlign(), true);
+                    stack_ai.decorateInst(ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign()));
+                }
+                else {
+                    const DataLayout &DL = jl_Module->getDataLayout();
+                    uint64_t sz = DL.getTypeStoreSize(T);
+                    emit_memcpy(ctx, ssaslot, stack_ai, vi.value, sz, ssaslot->getAlign(), varslot->getAlign());
+                }
             }
             Value *tindex = NULL;
             if (vi.pTIndex)
                 tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1), vi.isVolatile);
-            v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa().tbaa_stack);
+            v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa().tbaa_stack, None);
+        }
+        if (vi.inline_roots) {
+            AllocaInst *varslot = vi.inline_roots;
+            size_t nroots = cast<ConstantInt>(varslot->getArraySize())->getZExtValue();
+            auto T_prjlvalue = varslot->getAllocatedType();
+            if (auto AT = dyn_cast<ArrayType>(T_prjlvalue)) {
+                nroots *= AT->getNumElements();
+                T_prjlvalue = AT->getElementType();
+            }
+            assert(T_prjlvalue == ctx.types().T_prjlvalue);
+            v.inline_roots = load_gc_roots(ctx, varslot, nroots, vi.isVolatile);
         }
-        if (vi.boxroot == NULL)
-            v = update_julia_type(ctx, v, typ);
         if (vi.usedUndef) {
             assert(vi.defFlag);
             isnull = ctx.builder.CreateAlignedLoad(getInt1Ty(ctx.builder.getContext()), vi.defFlag, Align(1), vi.isVolatile);
@@ -5671,7 +6113,7 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
         Value *box_isnull = NULL;
         if (vi.usedUndef)
             box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue));
-        maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, typ);
+        maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, vi.value.typ);
         if (vi.pTIndex) {
             // value is either boxed in the stack slot, or unboxed in value
             // as indicated by testing (pTIndex & UNION_BOX_MARKER)
@@ -5680,15 +6122,14 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
                         ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             if (vi.usedUndef)
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
-            if (v.V) { // v.V will be null if it is a union of all ghost values
+            if (v.V) // v.V will be null if it is a union of all ghost values
                 v.V = ctx.builder.CreateSelect(load_unbox, decay_derived(ctx, v.V), decay_derived(ctx, boxed));
-            } else
+            else
                 v.V = boxed;
             v.Vboxed = boxed;
-            v = update_julia_type(ctx, v, typ);
         }
         else {
-            v = mark_julia_type(ctx, boxed, true, typ);
+            v = mark_julia_type(ctx, boxed, true, vi.value.typ);
             if (vi.usedUndef)
                 isnull = box_isnull;
         }
@@ -5720,48 +6161,27 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
         store_def_flag(ctx, vi, true);
 
     if (!vi.value.constant) { // check that this is not a virtual store
-        assert(vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL));
+        assert(vi.inline_roots || vi.value.ispointer() || (vi.pTIndex && vi.value.V == NULL));
         // store value
-        if (vi.value.V == NULL) {
-            // all ghost values in destination - nothing to copy or store
-        }
-        else if (rval_info.constant || !rval_info.ispointer()) {
-            if (rval_info.isghost) {
-                // all ghost values in source - nothing to copy or store
-            }
-            else {
-                if (rval_info.typ != vi.value.typ && !vi.pTIndex && !rval_info.TIndex) {
-                    // isbits cast-on-assignment is invalid. this branch should be dead-code.
-                    CreateTrap(ctx.builder);
-                }
-                else {
-                    Value *dest = vi.value.V;
-                    if (vi.pTIndex) // TODO: use lifetime-end here instead
-                        ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
-                    Type *store_ty = julia_type_to_llvm(ctx, rval_info.constant ? jl_typeof(rval_info.constant) : rval_info.typ);
-                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
-                    ai.decorateInst(ctx.builder.CreateStore(
-                                      emit_unbox(ctx, store_ty, rval_info, rval_info.typ),
-                                      dest,
-                                      vi.isVolatile));
-                }
-            }
-        }
-        else {
-            if (vi.pTIndex == NULL) {
-                assert(jl_is_concrete_type(vi.value.typ));
-                // Sometimes we can get into situations where the LHS and RHS
-                // are the same slot. We're not allowed to memcpy in that case
-                // due to LLVM bugs.
-                // This check should probably mostly catch the relevant situations.
-                if (vi.value.V != rval_info.V) {
-                    Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ));
-                    emit_memcpy(ctx, vi.value.V, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), rval_info, copy_bytes,
-                                julia_alignment(rval_info.typ), julia_alignment(rval_info.typ), vi.isVolatile);
-                }
-            }
+        rval_info = update_julia_type(ctx, rval_info, vi.value.typ);
+        if (rval_info.typ == jl_bottom_type)
+            return;
+        if (vi.pTIndex && vi.value.V) // TODO: use lifetime-end here instead
+            ctx.builder.CreateStore(UndefValue::get(cast<AllocaInst>(vi.value.V)->getAllocatedType()), vi.value.V);
+        // Sometimes we can get into situations where the LHS and RHS
+        // are the same slot. We're not allowed to memcpy in that case
+        // due to LLVM bugs.
+        // This check should probably mostly catch the relevant situations.
+        if (vi.value.V != nullptr ? vi.value.V != rval_info.V : vi.inline_roots != nullptr) {
+            MDNode *tbaa = ctx.tbaa().tbaa_stack; // Use vi.value.tbaa ?
+            if (rval_info.TIndex)
+                emit_unionmove(ctx, vi.value.V, tbaa, rval_info, /*skip*/isboxed, vi.isVolatile);
             else {
-                emit_unionmove(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, /*skip*/isboxed, vi.isVolatile);
+                Align align(julia_alignment(rval_info.typ));
+                if (vi.inline_roots)
+                    split_value_into(ctx, rval_info, align, vi.value.V, align, jl_aliasinfo_t::fromTBAA(ctx, tbaa), vi.inline_roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe), vi.isVolatile);
+                else
+                    emit_unbox_store(ctx, rval_info, vi.value.V, tbaa, align, vi.isVolatile);
             }
         }
     }
@@ -5776,7 +6196,8 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
     jl_value_t *phiType = NULL;
     if (jl_is_array(ssavalue_types)) {
         phiType = jl_array_ptr_ref(ssavalue_types, idx);
-    } else {
+    }
+    else {
         phiType = (jl_value_t*)jl_any_type;
     }
     jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
@@ -5786,6 +6207,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         return;
     }
     AllocaInst *dest = nullptr;
+    SmallVector<PHINode*,0> roots;
     // N.B.: For any memory space, used as a phi,
     // we need to emit space twice here. The reason for this is that
     // phi nodes may be arguments of other phi nodes, so if we don't
@@ -5796,7 +6218,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         size_t min_align, nbytes;
         dest = try_emit_union_alloca(ctx, ((jl_uniontype_t*)phiType), allunbox, min_align, nbytes);
         if (dest) {
-            Instruction *phi = dest->clone();
+            AllocaInst *phi = cast<AllocaInst>(dest->clone());
             phi->insertAfter(dest);
             PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi");
             Tindex_phi->insertInto(BB, InsertPt);
@@ -5805,14 +6227,14 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
             Value *isboxed = ctx.builder.CreateICmpNE(
                     ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
                     ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
-            ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, dest->getAlign(), nbytes, false);
+            ctx.builder.CreateMemCpy(phi, Align(min_align), dest, dest->getAlign(), nbytes, false);
             ctx.builder.CreateLifetimeEnd(dest);
             Value *ptr = ctx.builder.CreateSelect(isboxed,
                 decay_derived(ctx, ptr_phi),
                 decay_derived(ctx, phi));
             jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, best_tbaa(ctx.tbaa(), phiType));
             val.Vboxed = ptr_phi;
-            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, r));
+            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, roots, r));
             ctx.SAvalues[idx] = val;
             ctx.ssavalue_assigned[idx] = true;
             return;
@@ -5821,7 +6243,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
             PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi");
             Tindex_phi->insertInto(BB, InsertPt);
             jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, ctx.tbaa().tbaa_stack);
-            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)NULL, r));
+            ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)nullptr, roots, r));
             ctx.SAvalues[idx] = val;
             ctx.ssavalue_assigned[idx] = true;
             return;
@@ -5840,23 +6262,38 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
     }
     jl_cgval_t slot;
     PHINode *value_phi = NULL;
-    if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) {
+    if (!isboxed && vtype->isAggregateType()) {
         // the value will be moved into dest in the predecessor critical block.
         // here it's moved into phi in the successor (from dest)
-        dest = emit_static_alloca(ctx, vtype);
-        Value *phi = emit_static_alloca(ctx, vtype);
-        ctx.builder.CreateMemCpy(phi, Align(julia_alignment(phiType)),
-             dest, dest->getAlign(),
-             jl_datatype_size(phiType), false);
-        ctx.builder.CreateLifetimeEnd(dest);
-        slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack);
+        auto tracked = CountTrackedPointers(vtype);
+        if (tracked.count) {
+            roots.resize(tracked.count);
+            assert(tracked.count == split_value_size((jl_datatype_t*)phiType).second);
+            for (size_t nr = 0; nr < tracked.count; nr++) {
+                auto root_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_nrows(edges), "root_phi");
+                root_phi->insertInto(BB, InsertPt);
+                roots[nr] = root_phi;
+            }
+        }
+        AllocaInst *phi = nullptr;
+        if (!tracked.all) {
+            Align align(julia_alignment(phiType));
+            unsigned nb = jl_datatype_size(phiType);
+            dest = emit_static_alloca(ctx, nb, align);
+            phi = cast<AllocaInst>(dest->clone());
+            phi->insertBefore(dest);
+            ctx.builder.CreateMemCpy(phi, align, dest, align, nb, false);
+            ctx.builder.CreateLifetimeEnd(dest);
+        }
+        slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack,
+                roots.empty() ? ArrayRef<Value*>() : ArrayRef((Value *const *)&roots.front(), roots.size()));
     }
     else {
         value_phi = PHINode::Create(vtype, jl_array_nrows(edges), "value_phi");
         value_phi->insertInto(BB, InsertPt);
         slot = mark_julia_type(ctx, value_phi, isboxed, phiType);
     }
-    ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, r));
+    ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, roots, r));
     ctx.SAvalues[idx] = slot;
     ctx.ssavalue_assigned[idx] = true;
     return;
@@ -5876,8 +6313,9 @@ static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t ssaidx_0based, jl_valu
             it = ctx.phic_slots.emplace(ssaidx_0based, jl_varinfo_t(ctx.builder.getContext())).first;
         }
         slot = emit_varinfo(ctx, it->second, jl_symbol("phic"));
-    } else {
-        slot = emit_expr(ctx, r, ssaidx_0based); // slot could be a jl_value_t (unboxed) or jl_value_t* (ispointer)
+    }
+    else {
+        slot = emit_expr(ctx, r, ssaidx_0based);
     }
     if (slot.isboxed || slot.TIndex) {
         // see if inference suggested a different type for the ssavalue than the expression
@@ -6036,11 +6474,22 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
                 vi.pTIndex, Align(1), true);
         }
         else if (vi.value.V && !vi.value.constant && vi.value.typ != jl_bottom_type) {
-            assert(vi.value.ispointer());
-            Type *T = cast<AllocaInst>(vi.value.V)->getAllocatedType();
-            if (CountTrackedPointers(T).count) {
-                // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
-                ctx.builder.CreateStore(Constant::getNullValue(T), vi.value.V, true);
+            assert(vi.inline_roots || vi.value.ispointer());
+            if (vi.inline_roots) {
+                // memory optimization: make gc pointers re-initialized to NULL
+                AllocaInst *ssaroots = vi.inline_roots;
+                size_t nroots = cast<ConstantInt>(ssaroots->getArraySize())->getZExtValue();
+                auto T_prjlvalue = ssaroots->getAllocatedType();
+                if (auto AT = dyn_cast<ArrayType>(T_prjlvalue)) {
+                    nroots *= AT->getNumElements();
+                    T_prjlvalue = AT->getElementType();
+                }
+                assert(T_prjlvalue == ctx.types().T_prjlvalue);
+                Value *nullval = Constant::getNullValue(T_prjlvalue);
+                auto stack_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+                for (size_t i = 0; i < nroots; i++) {
+                    stack_ai.decorateInst(ctx.builder.CreateAlignedStore(nullval, emit_ptrgep(ctx, ssaroots, i * sizeof(void*)), ssaroots->getAlign(), true));
+                }
             }
         }
     }
@@ -6179,7 +6628,8 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     if (closure_method->source) {
         mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
         ci = (jl_code_instance_t*)jl_rettype_inferred_addr(mi, ctx.min_world, ctx.max_world);
-    } else {
+    }
+    else {
         mi = (jl_method_instance_t*)jl_atomic_load_relaxed(&closure_method->specializations);
         assert(jl_is_method_instance(mi));
         ci = jl_atomic_load_relaxed(&mi->cache);
@@ -6222,7 +6672,8 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
         closure_decls.specFunctionObject;
     if (GlobalValue *V = jl_Module->getNamedValue(fname)) {
         F = cast<Function>(V);
-    } else {
+    }
+    else {
         F = Function::Create(get_func_sig(ctx.builder.getContext()),
                              Function::ExternalLinkage,
                              fname, jl_Module);
@@ -6233,7 +6684,8 @@ static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_met
     Function *specF = NULL;
     if (!isspecsig) {
         specF = F;
-    } else {
+    }
+    else {
         //emission context holds context lock so can get module
         specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
         if (specF) {
@@ -6288,8 +6740,9 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         jl_value_t *val = expr;
         if (jl_is_quotenode(expr))
             val = jl_fieldref_noalloc(expr, 0);
-        if (jl_is_method(ctx.linfo->def.method)) // toplevel exprs are already rooted
-            val = jl_ensure_rooted(ctx, val);
+        // Toplevel exprs are rooted but because codegen assumes this is constant, it removes the write barriers for this code.
+        // This means we have to globally root the value here. (The other option would be to change how we optimize toplevel code)
+        val = jl_ensure_rooted(ctx, val);
         return mark_julia_const(ctx, val);
     }
 
@@ -6404,20 +6857,11 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
                     return ghostValue(ctx, jl_nothing_type);
                 }
                 bp = julia_binding_gv(ctx, bnd);
-                bp = julia_binding_pvalue(ctx, bp);
-            }
-            else if (jl_is_slotnumber(mn) || jl_is_argument(mn)) {
-                // XXX: eval_methoddef does not have this code branch
-                int sl = jl_slot_number(mn)-1;
-                jl_varinfo_t &vi = ctx.slots[sl];
-                bp = vi.boxroot;
-                name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
-            }
-            if (bp) {
-                Value *mdargs[] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp, literal_pointer_val(ctx, bnd) };
                 jl_cgval_t gf = mark_julia_type(
                         ctx,
-                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), ArrayRef<Value*>(mdargs)),
+                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), { bp,
+                            literal_pointer_val(ctx, (jl_value_t*)mod), name
+                        }),
                         true,
                         jl_function_type);
                 return gf;
@@ -6450,17 +6894,14 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             sym = jl_globalref_name(sym);
         }
         if (jl_is_symbol(sym)) {
-            jl_binding_t *bnd = NULL;
-            Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true, true);
-            if (bp) {
-                if (nargs == 2) {
-                    jl_cgval_t rhs = emit_expr(ctx, args[1]);
-                    ctx.builder.CreateCall(prepare_call(jldeclareconstval_func),
-                            { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), boxed(ctx, rhs) });
-                } else {
-                    ctx.builder.CreateCall(prepare_call(jldeclareconst_func),
-                            { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym) });
-                }
+            jl_binding_t *bnd = jl_get_module_binding(mod, sym, 1);
+            if (nargs == 2) {
+                jl_cgval_t rhs = emit_expr(ctx, args[1]);
+                ctx.builder.CreateCall(prepare_call(jldeclareconstval_func),
+                        { julia_binding_gv(ctx, bnd), literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), boxed(ctx, rhs) });
+            } else {
+                ctx.builder.CreateCall(prepare_call(jldeclareconstval_func),
+                        { julia_binding_gv(ctx, bnd), literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym), ConstantPointerNull::get(cast<PointerType>(ctx.types().T_prjlvalue)) });
             }
         }
     }
@@ -6536,6 +6977,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
             ((jl_method_t*)source.constant)->nargs > 0 &&
             jl_is_valid_oc_argtype((jl_tupletype_t*)argt.constant, (jl_method_t*)source.constant);
 
+        if (!can_optimize && trim_may_error(ctx.params->trim)) {
+            // if we know the return type, we can assume the result is of that type
+            errs() << "ERROR: Dynamic call to OpaqueClosure method\n";
+            errs() << "In " << ctx.builder.getCurrentDebugLocation()->getFilename() << ":" << ctx.builder.getCurrentDebugLocation()->getLine() << "\n";
+            print_stacktrace(ctx, ctx.params->trim);
+        }
+
         if (can_optimize) {
             jl_value_t *closure_t = NULL;
             jl_value_t *env_t = NULL;
@@ -6703,34 +7151,26 @@ static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0, bool or_new=fal
 
 static Value *get_current_task(jl_codectx_t &ctx)
 {
-    return get_current_task_from_pgcstack(ctx.builder, ctx.types().T_size, ctx.pgcstack);
+    return get_current_task_from_pgcstack(ctx.builder, ctx.pgcstack);
 }
 
 // Get PTLS through current task.
 static Value *get_current_ptls(jl_codectx_t &ctx)
 {
-    return get_current_ptls_from_task(ctx.builder, ctx.types().T_size, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
+    return get_current_ptls_from_task(ctx.builder, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
 }
 
 // Get the address of the world age of the current task
 static Value *get_tls_world_age_field(jl_codectx_t &ctx)
 {
     Value *ct = get_current_task(ctx);
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_size,
-            ct,
-            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, world_age) / ctx.types().sizeof_ptr),
-            "world_age");
+    return emit_ptrgep(ctx, ct, offsetof(jl_task_t, world_age), "world_age");
 }
 
 static Value *get_scope_field(jl_codectx_t &ctx)
 {
     Value *ct = get_current_task(ctx);
-    return ctx.builder.CreateInBoundsGEP(
-            ctx.types().T_prjlvalue,
-            ct,
-            ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, scope) / ctx.types().sizeof_ptr),
-            "current_scope");
+    return emit_ptrgep(ctx, ct, offsetof(jl_task_t, scope), "current_scope");
 }
 
 static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptrName, Module *M, jl_codegen_params_t &params)
@@ -6743,6 +7183,11 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr
             GlobalVariable::InternalLinkage,
             name, M);
     jl_init_function(f, params.TargetTriple);
+    if (trim_may_error(params.params->trim)) {
+        arraylist_push(&new_invokes, codeinst->def); // Try t compile this invoke
+        // TODO: Debuginfo!
+        push_frames(ctx, ctx.linfo, codeinst->def, 1);
+    }
     jl_name_jlfunc_args(params, f);
     //f->setAlwaysInline();
     ctx.f = f; // for jl_Module
@@ -6768,14 +7213,6 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, StringRef theFptr
     return f;
 }
 
-static Type *get_returnroots_type(jl_codectx_t &ctx, unsigned rootcount) {
-    return ArrayType::get(ctx.types().T_prjlvalue, rootcount);
-}
-
-static Type *get_unionbytes_type(LLVMContext &C, unsigned unionbytes) {
-    return ArrayType::get(getInt8Ty(C), unionbytes);
-}
-
 static void emit_cfunc_invalidate(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
         jl_value_t *calltype, jl_value_t *rettype, bool is_for_opaque_closure,
@@ -6803,14 +7240,17 @@ static void emit_cfunc_invalidate(
         ++AI; // gcstack_arg
     }
     for (size_t i = 0; i < nargs; i++) {
+        // n.b. calltype is required to be a datatype by construction for specsig
         jl_value_t *jt = jl_nth_slot_type(calltype, i);
-        // n.b. specTypes is required to be a datatype by construction for specsig
-        bool isboxed = false;
-        Type *et;
         if (i == 0 && is_for_opaque_closure) {
-            et = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
+            Value *arg_v = &*AI;
+            ++AI;
+            myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const);
+            continue;
         }
-        else if (deserves_argbox(jt)) {
+        bool isboxed = false;
+        Type *et;
+        if (deserves_argbox(jt)) {
             et = ctx.types().T_prjlvalue;
             isboxed = true;
         }
@@ -6827,15 +7267,19 @@ static void emit_cfunc_invalidate(
         else {
             Value *arg_v = &*AI;
             ++AI;
-            Type *at = arg_v->getType();
-            if ((i == 0 && is_for_opaque_closure) || (!isboxed && et->isAggregateType())) {
-                myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const);
+            if (!isboxed && et->isAggregateType()) {
+                auto tracked = CountTrackedPointers(et);
+                SmallVector<Value*,0> roots;
+                if (tracked.count && !tracked.all) {
+                    roots = load_gc_roots(ctx, &*AI, tracked.count);
+                    ++AI;
+                }
+                myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa().tbaa_const, roots);
             }
             else {
-                assert(at == et);
+                assert(arg_v->getType() == et);
                 myargs[i] = mark_julia_type(ctx, arg_v, isboxed, jt);
             }
-            (void)at;
         }
     }
     assert(AI == gf_thunk->arg_end());
@@ -6843,6 +7287,7 @@ static void emit_cfunc_invalidate(
     jl_cgval_t gf_retbox = mark_julia_type(ctx, gf_ret, true, jl_any_type);
     if (cc != jl_returninfo_t::Boxed) {
         emit_typecheck(ctx, gf_retbox, rettype, "cfunction");
+        gf_retbox = update_julia_type(ctx, gf_retbox, rettype);
     }
 
     switch (cc) {
@@ -6860,16 +7305,15 @@ static void emit_cfunc_invalidate(
         break;
     }
     case jl_returninfo_t::SRet: {
+        Value *sret = &*gf_thunk->arg_begin();
+        Align align(julia_alignment(rettype));
         if (return_roots) {
-            Value *root1 = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]*
-            #if JL_LLVM_VERSION < 170000
-            assert(cast<PointerType>(root1->getType())->isOpaqueOrPointeeTypeMatches(get_returnroots_type(ctx, return_roots)));
-            #endif
-            root1 = ctx.builder.CreateConstInBoundsGEP2_32(get_returnroots_type(ctx, return_roots), root1, 0, 0);
-            ctx.builder.CreateStore(gf_ret, root1);
-        }
-        emit_memcpy(ctx, &*gf_thunk->arg_begin(), jl_aliasinfo_t::fromTBAA(ctx, nullptr), gf_ret,
-                    jl_aliasinfo_t::fromTBAA(ctx, nullptr), jl_datatype_size(rettype), julia_alignment(rettype), julia_alignment(rettype));
+            Value *roots = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]*
+            split_value_into(ctx, gf_retbox, align, sret, align, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+        }
+        else {
+            emit_unbox_store(ctx, gf_retbox, sret, ctx.tbaa().tbaa_stack, align);
+        }
         ctx.builder.CreateRetVoid();
         break;
     }
@@ -7043,10 +7487,7 @@ static Function* gen_cfun_wrapper(
     if (calltype) {
         LoadInst *lam_max = ctx.builder.CreateAlignedLoad(
                 ctx.types().T_size,
-                ctx.builder.CreateConstInBoundsGEP1_32(
-                    ctx.types().T_size,
-                    literal_pointer_val(ctx, (jl_value_t*)codeinst),
-                    offsetof(jl_code_instance_t, max_world) / ctx.types().sizeof_ptr),
+                emit_ptrgep(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), offsetof(jl_code_instance_t, max_world)),
                 ctx.types().alignof_ptr);
         age_ok = ctx.builder.CreateICmpUGE(lam_max, world_v);
     }
@@ -7127,7 +7568,7 @@ static Function* gen_cfun_wrapper(
                     *closure_types = jl_alloc_vec_any(0);
                 jl_array_ptr_1d_push(*closure_types, jargty);
                 Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                        ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_nrows(*closure_types)),
+                        emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr),
                         Align(sizeof(void*)));
                 BasicBlock *boxedBB = BasicBlock::Create(ctx.builder.getContext(), "isboxed", cw);
                 BasicBlock *loadBB = BasicBlock::Create(ctx.builder.getContext(), "need-load", cw);
@@ -7193,7 +7634,7 @@ static Function* gen_cfun_wrapper(
                         *closure_types = jl_alloc_vec_any(0);
                     jl_array_ptr_1d_push(*closure_types, jargty);
                     Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                            ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_nrows(*closure_types)),
+                            emit_ptrgep(ctx, nestPtr, jl_array_nrows(*closure_types) * ctx.types().sizeof_ptr),
                             Align(sizeof(void*)));
                     Value *strct = box_ccall_result(ctx, val, runtime_dt, jargty);
                     inputarg = mark_julia_type(ctx, strct, true, jargty_proper);
@@ -7262,77 +7703,9 @@ static Function* gen_cfun_wrapper(
         bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
         assert(calltype == 3);
         // emit a specsig call
-        bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
         StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, invoke, codeinst);
+        bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
         jl_returninfo_t returninfo = get_specsig_function(ctx, M, NULL, protoname, lam->specTypes, astrt, is_opaque_closure, gcstack_arg);
-        FunctionType *cft = returninfo.decl.getFunctionType();
-        jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
-
-        // TODO: Can use use emit_call_specfun_other here?
-        SmallVector<Value*, 0> args;
-        Value *result = nullptr;
-        if (jlfunc_sret || returninfo.cc == jl_returninfo_t::Union) {
-            // fuse the two sret together, or emit an alloca to hold it
-            if (sig.sret && jlfunc_sret) {
-                result = emit_bitcast(ctx, sretPtr, cft->getParamType(0));
-            }
-            else {
-                if (jlfunc_sret) {
-                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType());
-                    setName(ctx.emission_context, result, "sret");
-                    #if JL_LLVM_VERSION < 170000
-                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-                    #endif
-                } else {
-                    result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes));
-                    setName(ctx.emission_context, result, "result_union");
-                    #if JL_LLVM_VERSION < 170000
-                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
-                    #endif
-                }
-            }
-            args.push_back(result);
-        }
-        if (returninfo.return_roots) {
-            AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots));
-            setName(ctx.emission_context, return_roots, "return_roots");
-            args.push_back(return_roots);
-        }
-        if (gcstack_arg)
-            args.push_back(ctx.pgcstack);
-        for (size_t i = 0; i < nargs + 1; i++) {
-            // figure out how to repack the arguments
-            jl_cgval_t &inputarg = inputargs[i];
-            Value *arg;
-            jl_value_t *spect = (i == 0 && is_opaque_closure) ? (jl_value_t*)jl_any_type :
-                jl_nth_slot_type(lam->specTypes, i);
-            // n.b. specTypes is required to be a datatype by construction for specsig
-            bool isboxed = deserves_argbox(spect);
-            Type *T = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, spect);
-            if (is_uniquerep_Type(spect)) {
-                continue;
-            }
-            else if (isboxed) {
-                arg = boxed(ctx, inputarg);
-            }
-            else if (type_is_ghost(T)) {
-                continue; // ghost types are skipped by the specsig method signature
-            }
-            else if (T->isAggregateType()) {
-                // aggregate types are passed by pointer
-                inputarg = value_to_pointer(ctx, inputarg);
-                arg = decay_derived(ctx, data_pointer(ctx, inputarg));
-            }
-            else {
-                arg = emit_unbox(ctx, T, inputarg, spect);
-                assert(!isa<UndefValue>(arg));
-            }
-
-            // add to argument list
-            args.push_back(arg);
-        }
-        Value *theFptr = returninfo.decl.getCallee();
-        assert(theFptr);
         if (age_ok) {
             funcName += "_gfthunk";
             Function *gf_thunk = Function::Create(returninfo.decl.getFunctionType(),
@@ -7344,49 +7717,17 @@ static Function* gen_cfun_wrapper(
             // but which has the signature of a specsig
             emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, is_opaque_closure, nargs + 1, ctx.emission_context,
                 min_world, max_world);
-            theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
+            returninfo.decl = FunctionCallee(returninfo.decl.getFunctionType(), ctx.builder.CreateSelect(age_ok, returninfo.decl.getCallee(), gf_thunk));
         }
-
-        #if JL_LLVM_VERSION < 170000
-        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl.getFunctionType()));
-        #endif
-        CallInst *call = ctx.builder.CreateCall(
-            returninfo.decl.getFunctionType(),
-            theFptr, ArrayRef<Value*>(args));
-        call->setAttributes(returninfo.attrs);
-        if (gcstack_arg)
-            call->setCallingConv(CallingConv::Swift);
-
-        switch (returninfo.cc) {
-            case jl_returninfo_t::Boxed:
-                retval = mark_julia_type(ctx, call, true, astrt);
-                break;
-            case jl_returninfo_t::Register:
-                retval = mark_julia_type(ctx, call, false, astrt);
-                break;
-            case jl_returninfo_t::SRet:
-                retval = mark_julia_slot(result, astrt, NULL, ctx.tbaa().tbaa_stack);
-                break;
-            case jl_returninfo_t::Union: {
-                Value *box = ctx.builder.CreateExtractValue(call, 0);
-                Value *tindex = ctx.builder.CreateExtractValue(call, 1);
-                Value *derived = ctx.builder.CreateSelect(
-                    ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), UNION_BOX_MARKER)),
-                            ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
-                    decay_derived(ctx, result),
-                    decay_derived(ctx, box));
-                retval = mark_julia_slot(derived,
-                                         astrt,
-                                         tindex,
-                                         ctx.tbaa().tbaa_stack);
-                assert(box->getType() == ctx.types().T_prjlvalue);
-                retval.Vboxed = box;
-                break;
-            }
-            case jl_returninfo_t::Ghosts:
-                retval = mark_julia_slot(NULL, astrt, call, ctx.tbaa().tbaa_stack);
-                break;
+        retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, codeinst->rettype, returninfo, nullptr, inputargs, nargs + 1);
+        jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
+        if (jlfunc_sret && sig.sret) {
+            // fuse the two sret together
+            assert(retval.ispointer());
+            AllocaInst *result = cast<AllocaInst>(retval.V);
+            retval.V = sretPtr;
+            result->replaceAllUsesWith(sretPtr);
+            result->eraseFromParent();
         }
     }
 
@@ -7685,7 +8026,7 @@ const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysi
 }
 
 // generate a julia-callable function that calls f (AKA lam)
-static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, int retarg, StringRef funcName,
+static void gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, jl_returninfo_t &f, unsigned nargs, int retarg, StringRef funcName,
         Module *M, jl_codegen_params_t &params)
 {
     ++GeneratedInvokeWrappers;
@@ -7713,131 +8054,48 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0);
 
-    // TODO: replace this with emit_call_specfun_other?
-    FunctionType *ftype = const_cast<llvm::FunctionCallee&>(f.decl).getFunctionType();
-    size_t nfargs = ftype->getNumParams();
-    SmallVector<Value *, 0> args(nfargs);
-    unsigned idx = 0;
-    AllocaInst *result = NULL;
-    switch (f.cc) {
-    case jl_returninfo_t::Boxed:
-    case jl_returninfo_t::Register:
-    case jl_returninfo_t::Ghosts:
-        break;
-    case jl_returninfo_t::SRet:
-        #if JL_LLVM_VERSION < 170000
-        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType()));
-        #endif
-        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.attrs, 1, Attribute::StructRet).getValueAsType());
-        setName(ctx.emission_context, result, "sret");
-        args[idx] = result;
-        idx++;
-        break;
-    case jl_returninfo_t::Union:
-        result = ctx.builder.CreateAlloca(ArrayType::get(getInt8Ty(ctx.builder.getContext()), f.union_bytes));
-        if (f.union_align > 1)
-            result->setAlignment(Align(f.union_align));
-        args[idx] = result;
-        idx++;
-        setName(ctx.emission_context, result, "result_union");
-        break;
-    }
-    if (f.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots));
-        setName(ctx.emission_context, return_roots, "return_roots");
-        args[idx] = return_roots;
-        idx++;
-    }
-    bool gcstack_arg = JL_FEAT_TEST(ctx, gcstack_arg);
-    if (gcstack_arg) {
-        args[idx] = ctx.pgcstack;
-        idx++;
-    }
+    SmallVector<jl_cgval_t, 0> argv(nargs);
     bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
-    for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+    for (size_t i = 0; i < nargs; ++i) {
         jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
             jl_nth_slot_type(lam->specTypes, i);
-        // n.b. specTypes is required to be a datatype by construction for specsig
-        bool isboxed = deserves_argbox(ty);
-        Type *lty = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, ty);
-        if (type_is_ghost(lty) || is_uniquerep_Type(ty))
-            continue;
         Value *theArg;
         if (i == 0) {
-            // This function adapts from generic jlcall to OC specsig. Generic jlcall pointers
-            // come in as ::Tracked, but specsig expected ::Derived.
-            if (is_opaque_closure)
-                theArg = decay_derived(ctx, funcArg);
-            else
-                theArg = funcArg;
+            theArg = funcArg;
         }
         else {
-            Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+            Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr);
             theArg = ai.decorateInst(maybe_mark_load_dereferenceable(
                     ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                     false,
                     ty));
         }
-        if (!isboxed) {
-            theArg = decay_derived(ctx, theArg);
-            if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
-                theArg = ctx.builder.CreateAlignedLoad(lty, theArg, Align(julia_alignment(ty)));
-        }
-        assert(!isa<UndefValue>(theArg));
-        args[idx] = theArg;
-        idx++;
+        argv[i] = mark_julia_type(ctx, theArg, true, ty);
     }
-    CallInst *call = ctx.builder.CreateCall(f.decl, args);
-    call->setAttributes(f.attrs);
-    if (gcstack_arg)
-        call->setCallingConv(CallingConv::Swift);
-    jl_cgval_t retval;
+    jl_cgval_t retval = emit_call_specfun_other(ctx, is_opaque_closure, lam->specTypes, jlretty, f, nullptr, argv, nargs);
     if (retarg != -1) {
         Value *theArg;
         if (retarg == 0)
             theArg = funcArg;
         else
             theArg = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
-                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, retarg - 1),
+                    emit_ptrgep(ctx, argArray, (retarg - 1) * ctx.types().sizeof_ptr),
                     Align(sizeof(void*)));
         retval = mark_julia_type(ctx, theArg, true, jl_any_type);
     }
-    else {
-        switch (f.cc) {
-        case jl_returninfo_t::Boxed:
-            retval = mark_julia_type(ctx, call, true, jlretty);
-            break;
-        case jl_returninfo_t::Register:
-            retval = mark_julia_type(ctx, call, false, jlretty);
-            break;
-        case jl_returninfo_t::SRet:
-            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa().tbaa_stack);
-            break;
-        case jl_returninfo_t::Union:
-            // result is technically not right here, but `boxed` will only look at it
-            // for the unboxed values, so it's ok.
-            retval = mark_julia_slot(result,
-                                     jlretty,
-                                     ctx.builder.CreateExtractValue(call, 1),
-                                     ctx.tbaa().tbaa_stack);
-            retval.Vboxed = ctx.builder.CreateExtractValue(call, 0);
-            assert(retval.Vboxed->getType() == ctx.types().T_prjlvalue);
-            break;
-        case jl_returninfo_t::Ghosts:
-            retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa().tbaa_stack);
-            break;
-        }
-    }
-    ctx.builder.CreateRet(boxed(ctx, retval));
-    return w;
+    if (retval.typ == jl_bottom_type)
+        CreateTrap(ctx.builder, false);
+    else
+        ctx.builder.CreateRet(boxed(ctx, retval));
 }
 
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments, size_t *arg_offset)
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg,
+        ArrayRef<const char*> ArgNames, unsigned nreq)
 {
     jl_returninfo_t props = {};
-    SmallVector<Type*, 8> fsig;
-    SmallVector<std::string, 4> argnames;
+    SmallVector<Type*,8> fsig;
+    SmallVector<std::string,4> argnames;
     Type *rt = NULL;
     Type *srt = NULL;
     if (jlrettype == (jl_value_t*)jl_bottom_type) {
@@ -7874,9 +8132,13 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         if (rt != getVoidTy(ctx.builder.getContext()) && deserves_sret(jlrettype, rt)) {
             auto tracked = CountTrackedPointers(rt, true);
             assert(!tracked.derived);
-            if (tracked.count && !tracked.all)
+            if (tracked.count && !tracked.all) {
                 props.return_roots = tracked.count;
+                assert(props.return_roots == ((jl_datatype_t*)jlrettype)->layout->npointers);
+            }
             props.cc = jl_returninfo_t::SRet;
+            props.union_bytes = jl_datatype_size(jlrettype);
+            props.union_align = props.union_minalign = julia_alignment(jlrettype);
             // sret is always passed from alloca
             assert(M);
             fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace()));
@@ -7918,7 +8180,7 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         param.addAttribute(Attribute::NoCapture);
         param.addAttribute(Attribute::NoUndef);
         attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
-        fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0));
+        fsig.push_back(ctx.types().T_ptr);
         argnames.push_back("return_roots");
     }
 
@@ -7931,33 +8193,25 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         argnames.push_back("pgcstack_arg");
     }
 
-    if (arg_offset)
-        *arg_offset = fsig.size();
     size_t nparams = jl_nparams(sig);
-    if (used_arguments)
-        used_arguments->resize(nparams);
-
     for (size_t i = 0; i < nparams; i++) {
         jl_value_t *jt = jl_tparam(sig, i);
         bool isboxed = false;
-        Type *ty = NULL;
-        if (i == 0 && is_opaque_closure) {
-            ty = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
-            isboxed = true; // true-ish anyway - we might not have the type tag
-        }
-        else {
+        Type *et = nullptr;
+        if (i != 0 || !is_opaque_closure) { // special token for OC argument
             if (is_uniquerep_Type(jt))
                 continue;
             isboxed = deserves_argbox(jt);
-            ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+            et = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
+            if (type_is_ghost(et))
+                continue;
         }
-        if (type_is_ghost(ty))
-            continue;
         AttrBuilder param(ctx.builder.getContext());
-        if (ty->isAggregateType()) { // aggregate types are passed by pointer
+        Type *ty = et;
+        if (et == nullptr || et->isAggregateType()) { // aggregate types are passed by pointer
             param.addAttribute(Attribute::NoCapture);
             param.addAttribute(Attribute::ReadOnly);
-            ty = PointerType::get(ty, AddressSpace::Derived);
+            ty = ctx.builder.getPtrTy(AddressSpace::Derived);
         }
         else if (isboxed && jl_is_immutable_datatype(jt)) {
             param.addAttribute(Attribute::ReadOnly);
@@ -7969,8 +8223,26 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
         }
         attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
         fsig.push_back(ty);
-        if (used_arguments)
-            used_arguments->set(i);
+        size_t argno = i < nreq ? i : nreq;
+        std::string genname;
+        if (!ArgNames.empty()) {
+            genname = ArgNames[argno];
+            if (genname.empty())
+                genname = (StringRef("#") + Twine(argno + 1)).str();
+            if (i >= nreq)
+                genname += (StringRef("[") + Twine(i - nreq + 1) + StringRef("]")).str();
+            const char *arg_typename = jl_is_datatype(jt) ? jl_symbol_name(((jl_datatype_t*)jt)->name->name) : "<unknown type>";
+            argnames.push_back((genname + StringRef("::") + arg_typename).str());
+        }
+        if (et && et->isAggregateType()) {
+            auto tracked = CountTrackedPointers(et);
+            if (tracked.count && !tracked.all) {
+                attrs.push_back(AttributeSet::get(ctx.builder.getContext(), param));
+                fsig.push_back(ctx.builder.getPtrTy(M->getDataLayout().getAllocaAddrSpace()));
+                if (!genname.empty())
+                    argnames.push_back((Twine(".roots.") + genname).str());
+            }
+        }
     }
 
     AttributeSet FnAttrs;
@@ -8018,12 +8290,6 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value
     return props;
 }
 
-static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, Type *ShadowT, unsigned count)
-{
-    unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ShadowT, ctx.builder); //This comes from Late-GC-Lowering??
-    assert(emitted == count); (void)emitted; (void)count;
-}
-
 static DISubroutineType *
 get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl_value_t *sig, DIBuilder &dbuilder)
 {
@@ -8139,6 +8405,7 @@ static jl_llvm_functions_t
     if (lam && jl_is_method(lam->def.method)) {
         toplineno = lam->def.method->line;
         ctx.file = jl_symbol_name(lam->def.method->file);
+        ctx.line = lam->def.method->line;
     }
     else if ((jl_value_t*)src->debuginfo != jl_nothing) {
         // look for the file and line info of the original start of this block, as reported by lowering
@@ -8147,6 +8414,7 @@ static jl_llvm_functions_t
             debuginfo = debuginfo->linetable;
         ctx.file = jl_debuginfo_file(debuginfo);
         struct jl_codeloc_t lineidx = jl_uncompress1_codeloc(debuginfo->codelocs, 0);
+        ctx.line = lineidx.line;
         toplineno = std::max((int32_t)0, lineidx.line);
     }
     if (ctx.file.empty())
@@ -8236,49 +8504,26 @@ static jl_llvm_functions_t
     Function *f = NULL;
     bool has_sret = false;
     if (specsig) { // assumes !va and !needsparams
-        BitVector used_args;
-        size_t args_begin;
-        returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes,
-                                          jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg), &used_args, &args_begin);
-        f = cast<Function>(returninfo.decl.getCallee());
-        has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
-        jl_init_function(f, ctx.emission_context.TargetTriple);
+        SmallVector<const char*,0> ArgNames(0);
         if (ctx.emission_context.debug_level >= 2) {
-            auto arg_typename = [&](size_t i) JL_NOTSAFEPOINT {
-                auto tp = jl_tparam(lam->specTypes, i);
-                return jl_is_datatype(tp) ? jl_symbol_name(((jl_datatype_t*)tp)->name->name) : "<unknown type>";
-            };
-            size_t nreal = 0;
-            for (size_t i = 0; i < std::min(nreq, static_cast<size_t>(used_args.size())); i++) {
+            ArgNames.resize(ctx.nargs, "");
+            for (int i = 0; i < ctx.nargs; i++) {
                 jl_sym_t *argname = slot_symbol(ctx, i);
                 if (argname == jl_unused_sym)
                     continue;
-                if (used_args.test(i)) {
-                    auto &arg = *f->getArg(args_begin++);
-                    nreal++;
-                    auto name = jl_symbol_name(argname);
-                    if (!name[0]) {
-                        arg.setName(StringRef("#") + Twine(nreal) + StringRef("::") + arg_typename(i));
-                    } else {
-                        arg.setName(name + StringRef("::") + arg_typename(i));
-                    }
-                }
-            }
-            if (va && ctx.vaSlot != -1) {
-                size_t vidx = 0;
-                for (size_t i = nreq; i < used_args.size(); i++) {
-                    if (used_args.test(i)) {
-                        auto &arg = *f->getArg(args_begin++);
-                        auto type = arg_typename(i);
-                        const char *name = jl_symbol_name(slot_symbol(ctx, ctx.vaSlot));
-                        if (!name[0])
-                            name = "...";
-                        vidx++;
-                        arg.setName(name + StringRef("[") + Twine(vidx) + StringRef("]::") + type);
-                    }
-                }
+                const char *name = jl_symbol_name(argname);
+                if (name[0] == '\0' && ctx.vaSlot == i)
+                    ArgNames[i] = "...";
+                else
+                    ArgNames[i] = name;
             }
         }
+        returninfo = get_specsig_function(ctx, M, NULL, declarations.specFunctionObject, lam->specTypes,
+                                          jlrettype, ctx.is_opaque_closure, JL_FEAT_TEST(ctx,gcstack_arg),
+                                          ArgNames, nreq);
+        f = cast<Function>(returninfo.decl.getCallee());
+        has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
+        jl_init_function(f, ctx.emission_context.TargetTriple);
 
         // common pattern: see if all return statements are an argument in that
         // case the apply-generic call can re-use the original box for the return
@@ -8307,7 +8552,8 @@ static jl_llvm_functions_t
         std::string wrapName;
         raw_string_ostream(wrapName) << "jfptr_" << ctx.name << "_" << jl_atomic_fetch_add_relaxed(&globalUniqueGeneratedNames, 1);
         declarations.functionObject = wrapName;
-        (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context);
+        size_t nparams = jl_nparams(lam->specTypes);
+        gen_invoke_wrapper(lam, jlrettype, returninfo, nparams, retarg, declarations.functionObject, M, ctx.emission_context);
         // TODO: add attributes: maybe_mark_argument_dereferenceable(Arg, argType)
         // TODO: add attributes: dereferenceable<sizeof(void*) * nreq>
         // TODO: (if needsparams) add attributes: dereferenceable<sizeof(void*) * length(sp)>, readonly, nocapture
@@ -8478,14 +8724,16 @@ static jl_llvm_functions_t
     allocate_gc_frame(ctx, b0);
     Value *last_age = NULL;
     auto world_age_field = get_tls_world_age_field(ctx);
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
-    last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad(
-               ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
-    ctx.world_age_at_entry = last_age; // Load world age for use in get_tls_world_age
+    { // scope
+        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe);
+        last_age = ai.decorateInst(ctx.builder.CreateAlignedLoad(
+                   ctx.types().T_size, world_age_field, ctx.types().alignof_ptr));
+        ctx.world_age_at_entry = last_age; // Load world age for use in get_tls_world_age
+    }
 
     // step 7. allocate local variables slots
     // must be in the first basic block for the llvm mem2reg pass to work
-    auto allocate_local = [&ctx, &dbuilder, &debugcache, topdebugloc, va, debug_enabled, M](jl_varinfo_t &varinfo, jl_sym_t *s, int i) {
+    auto allocate_local = [&ctx, &dbuilder, &debugcache, topdebugloc, va, debug_enabled](jl_varinfo_t &varinfo, jl_sym_t *s, int i) {
         jl_value_t *jt = varinfo.value.typ;
         assert(!varinfo.boxroot); // variables shouldn't have memory locs already
         if (varinfo.value.constant) {
@@ -8505,13 +8753,13 @@ static jl_llvm_functions_t
             if (lv) {
                 lv->setName(jl_symbol_name(s));
                 varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
-                varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                varinfo.pTIndex = emit_static_alloca(ctx, 1, Align(1));
                 setName(ctx.emission_context, varinfo.pTIndex, "tindex");
                 // TODO: attach debug metadata to this variable
             }
             else if (allunbox) {
                 // all ghost values just need a selector allocated
-                AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
+                AllocaInst *lv = emit_static_alloca(ctx, 1, Align(1));
                 lv->setName(jl_symbol_name(s));
                 varinfo.pTIndex = lv;
                 varinfo.value.tbaa = NULL;
@@ -8524,30 +8772,25 @@ static jl_llvm_functions_t
                 return;
         }
         else if (deserves_stack(jt)) {
-            bool isboxed;
-            Type *vtype = julia_type_to_llvm(ctx, jt, &isboxed);
-            assert(!isboxed);
-            assert(!type_is_ghost(vtype) && "constants should already be handled");
-            Value *lv = new AllocaInst(vtype, M->getDataLayout().getAllocaAddrSpace(), nullptr, Align(jl_datatype_align(jt)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
-            if (CountTrackedPointers(vtype).count) {
-                StoreInst *SI = new StoreInst(Constant::getNullValue(vtype), lv, false, Align(sizeof(void*)));
-                SI->insertAfter(ctx.topalloca);
-            }
-            varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack);
+            auto sizes = split_value_size((jl_datatype_t*)jt);
+            AllocaInst *bits = sizes.first > 0 ? emit_static_alloca(ctx, sizes.first, Align(julia_alignment(jt))) : nullptr;
+            AllocaInst *roots = sizes.second > 0 ? emit_static_roots(ctx, sizes.second) : nullptr;
+            if (bits) bits->setName(jl_symbol_name(s));
+            if (roots) roots->setName(StringRef(".roots.") + jl_symbol_name(s));
+            varinfo.value = mark_julia_slot(bits, jt, NULL, ctx.tbaa().tbaa_stack, None);
+            varinfo.inline_roots = roots;
             alloc_def_flag(ctx, varinfo);
             if (debug_enabled && varinfo.dinfo) {
                 assert((Metadata*)varinfo.dinfo->getType() != debugcache.jl_pvalue_dillvmt);
-                dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(),
+                dbuilder.insertDeclare(bits ? bits : roots, varinfo.dinfo, dbuilder.createExpression(),
                                        topdebugloc,
                                        ctx.builder.GetInsertBlock());
             }
             return;
         }
         // otherwise give it a boxroot in this function
-        AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, M->getDataLayout().getAllocaAddrSpace(),
-            nullptr, Align(sizeof(jl_value_t*)), jl_symbol_name(s), /*InsertBefore*/ctx.topalloca);
-        StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*)));
-        SI->insertAfter(ctx.topalloca);
+        AllocaInst *av = emit_static_roots(ctx, 1);
+        av->setName(jl_symbol_name(s));
         varinfo.boxroot = av;
         if (debug_enabled && varinfo.dinfo) {
             SmallVector<uint64_t, 1> addr;
@@ -8634,12 +8877,18 @@ static jl_llvm_functions_t
         ++AI;
         AttrBuilder param(ctx.builder.getContext(), f->getAttributes().getParamAttrs(Arg->getArgNo()));
         jl_cgval_t theArg;
-        if (llvmArgType->isAggregateType()) {
+        if (!isboxed && llvmArgType->isAggregateType()) {
             maybe_mark_argument_dereferenceable(param, argType);
-            theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa().tbaa_const); // this argument is by-pointer
+            SmallVector<Value*,0> roots;
+            auto tracked = CountTrackedPointers(llvmArgType);
+            if (tracked.count && !tracked.all) {
+                roots = load_gc_roots(ctx, &*AI, tracked.count);
+                ++AI;
+            }
+            theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa().tbaa_const, roots); // this argument is by-pointer
         }
         else {
-            if (isboxed) // e.g. is-pointer
+            if (isboxed)
                 maybe_mark_argument_dereferenceable(param, argType);
             theArg = mark_julia_type(ctx, Arg, isboxed, argType);
             if (theArg.tbaa == ctx.tbaa().tbaa_immut)
@@ -8696,100 +8945,93 @@ static jl_llvm_functions_t
         bool isboxed = deserves_argbox(argType);
         Type *llvmArgType = NULL;
         if (i == 0 && ctx.is_opaque_closure) {
-            isboxed = true;
-            llvmArgType = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
+            isboxed = false;
+            llvmArgType = ctx.builder.getPtrTy(AddressSpace::Derived);
             argType = (jl_value_t*)jl_any_type;
         }
         else {
             llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
         }
-        if (s == jl_unused_sym) {
-            if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
-                ++AI;
-            continue;
-        }
         jl_varinfo_t &vi = ctx.slots[i];
-        jl_cgval_t theArg;
         if (s == jl_unused_sym || vi.value.constant) {
             assert(vi.boxroot == NULL);
-            if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
+            if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType)) {
                 ++AI;
+                auto tracked = CountTrackedPointers(llvmArgType);
+                if (tracked.count && !tracked.all)
+                    ++AI;
+            }
+            continue;
+        }
+        jl_cgval_t theArg;
+        // If this is an opaque closure, implicitly load the env and switch
+        // the world age.
+        if (i == 0 && ctx.is_opaque_closure) {
+            // Load closure world
+            Value *oc_this = decay_derived(ctx, &*AI++);
+            Value *argaddr = oc_this;
+            Value *worldaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, world));
+
+            jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
+                nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value());
+            ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure
+            emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr);
+
+            // Load closure env
+            Value *envaddr = emit_ptrgep(ctx, argaddr, offsetof(jl_opaque_closure_t, captures));
+
+            jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
+                nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
+            theArg = update_julia_type(ctx, closure_env, vi.value.typ);
+        }
+        else if (specsig) {
+            theArg = get_specsig_arg(argType, llvmArgType, isboxed);
         }
         else {
-            // If this is an opaque closure, implicitly load the env and switch
-            // the world age.
-            if (i == 0 && ctx.is_opaque_closure) {
-                // Load closure world
-                Value *oc_this = decay_derived(ctx, &*AI++);
-                Value *argaddr = oc_this;
-                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, world)));
-
-                jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
-                    nullptr, nullptr, false, AtomicOrdering::NotAtomic, false, ctx.types().alignof_ptr.value());
-                ctx.world_age_at_entry = closure_world.V; // The tls world in a OC is the world of the closure
-                emit_unbox_store(ctx, closure_world, world_age_field, ctx.tbaa().tbaa_gcframe, ctx.types().alignof_ptr.value());
-
-                // Load closure env
-                Value *envaddr = ctx.builder.CreateInBoundsGEP(
-                        getInt8Ty(ctx.builder.getContext()), argaddr,
-                        ConstantInt::get(ctx.types().T_size, offsetof(jl_opaque_closure_t, captures)));
-
-                jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
-                    nullptr, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
-                theArg = update_julia_type(ctx, closure_env, vi.value.typ);
-            }
-            else if (specsig) {
-                theArg = get_specsig_arg(argType, llvmArgType, isboxed);
+            if (i == 0) {
+                // first (function) arg is separate in jlcall
+                theArg = mark_julia_type(ctx, fArg, true, vi.value.typ);
             }
             else {
-                if (i == 0) {
-                    // first (function) arg is separate in jlcall
-                    theArg = mark_julia_type(ctx, fArg, true, vi.value.typ);
-                }
-                else {
-                    Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
-                    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
-                    Value *load = ai.decorateInst(maybe_mark_load_dereferenceable(
-                            ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
-                            false, vi.value.typ));
-                    theArg = mark_julia_type(ctx, load, true, vi.value.typ);
-                    if (debug_enabled && vi.dinfo && !vi.boxroot) {
-                        SmallVector<uint64_t, 8> addr;
+                Value *argPtr = emit_ptrgep(ctx, argArray, (i - 1) * ctx.types().sizeof_ptr);
+                jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const);
+                Value *load = ai.decorateInst(maybe_mark_load_dereferenceable(
+                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
+                        false, vi.value.typ));
+                theArg = mark_julia_type(ctx, load, true, vi.value.typ);
+                if (debug_enabled && vi.dinfo && !vi.boxroot) {
+                    SmallVector<uint64_t, 8> addr;
+                    addr.push_back(llvm::dwarf::DW_OP_deref);
+                    addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
+                    addr.push_back((i - 1) * sizeof(void*));
+                    if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt)
                         addr.push_back(llvm::dwarf::DW_OP_deref);
-                        addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
-                        addr.push_back((i - 1) * sizeof(void*));
-                        if ((Metadata*)vi.dinfo->getType() != debugcache.jl_pvalue_dillvmt)
-                            addr.push_back(llvm::dwarf::DW_OP_deref);
-                        dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr),
-                                        topdebugloc,
-                                        ctx.builder.GetInsertBlock());
-                    }
+                    dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr),
+                                    topdebugloc,
+                                    ctx.builder.GetInsertBlock());
                 }
             }
+        }
 
-
-            if (vi.boxroot == NULL) {
-                assert(vi.value.V == NULL && "unexpected variable slot created for argument");
-                // keep track of original (possibly boxed) value to avoid re-boxing or moving
-                vi.value = theArg;
-                if (debug_enabled && vi.dinfo && theArg.V) {
-                    if (theArg.ispointer()) {
-                        dbuilder.insertDeclare(theArg.V, vi.dinfo, dbuilder.createExpression(),
-                                               topdebugloc, ctx.builder.GetInsertBlock());
-                    }
-                    else {
-                        dbuilder.insertDbgValueIntrinsic(theArg.V, vi.dinfo, dbuilder.createExpression(),
-                                                         topdebugloc, ctx.builder.GetInsertBlock());
-                    }
+        if (vi.boxroot == nullptr) {
+            assert(vi.value.V == nullptr && vi.inline_roots == nullptr && "unexpected variable slot created for argument");
+            // keep track of original (possibly boxed) value to avoid re-boxing or moving
+            vi.value = theArg;
+            if (debug_enabled && vi.dinfo && theArg.V) {
+                if (!theArg.inline_roots.empty() || theArg.ispointer()) {
+                    dbuilder.insertDeclare(theArg.V, vi.dinfo, dbuilder.createExpression(),
+                                            topdebugloc, ctx.builder.GetInsertBlock());
+                }
+                else {
+                    dbuilder.insertDbgValueIntrinsic(theArg.V, vi.dinfo, dbuilder.createExpression(),
+                                                        topdebugloc, ctx.builder.GetInsertBlock());
                 }
-            }
-            else {
-                Value *argp = boxed(ctx, theArg);
-                ctx.builder.CreateStore(argp, vi.boxroot);
             }
         }
+        else {
+            Value *argp = boxed(ctx, theArg);
+            ctx.builder.CreateStore(argp, vi.boxroot);
+        }
     }
     // step 9. allocate rest argument
     CallInst *restTuple = NULL;
@@ -8825,10 +9067,8 @@ static jl_llvm_functions_t
             restTuple =
                 ctx.builder.CreateCall(F,
                         { Constant::getNullValue(ctx.types().T_prjlvalue),
-                          ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray,
-                                  ConstantInt::get(ctx.types().T_size, nreq - 1)),
-                          ctx.builder.CreateSub(argCount,
-                                  ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq - 1)) });
+                          emit_ptrgep(ctx, argArray, (nreq - 1) * sizeof(jl_value_t*)),
+                          ctx.builder.CreateSub(argCount, ctx.builder.getInt32(nreq - 1)) });
             restTuple->setAttributes(F->getAttributes());
             ctx.builder.CreateStore(restTuple, vi.boxroot);
         }
@@ -8846,7 +9086,7 @@ static jl_llvm_functions_t
                 !jl_is_submodule(mod, jl_core_module));
     };
     auto in_tracked_path = [] (StringRef file) { // falls within an explicitly set file or directory
-        return jl_options.tracked_path != NULL && file.startswith(jl_options.tracked_path);
+        return jl_options.tracked_path != NULL && file.starts_with(jl_options.tracked_path);
     };
     bool mod_is_user_mod = in_user_mod(ctx.module);
     bool mod_is_tracked = in_tracked_path(ctx.file);
@@ -9265,28 +9505,31 @@ static jl_llvm_functions_t
                 break;
             }
             if (sret) {
-                if (retvalinfo.ispointer()) {
-                    if (returninfo.return_roots) {
-                        Type *store_ty = julia_type_to_llvm(ctx, retvalinfo.typ);
-                        emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
-                    }
+                Align align(returninfo.union_align);
+                if (!returninfo.return_roots && !retvalinfo.inline_roots.empty()) {
+                    assert(retvalinfo.V == nullptr);
+                    assert(returninfo.cc == jl_returninfo_t::SRet);
+                    split_value_into(ctx, retvalinfo, align, nullptr, align,
+                            jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), sret, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+                }
+                else if (returninfo.return_roots) {
+                    assert(returninfo.cc == jl_returninfo_t::SRet);
+                    Value *return_roots = f->arg_begin() + 1;
+                    split_value_into(ctx, retvalinfo, align, sret, align,
+                            jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), return_roots, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_gcframe));
+                }
+                else if (retvalinfo.ispointer()) {
                     if (returninfo.cc == jl_returninfo_t::SRet) {
                         assert(jl_is_concrete_type(jlrettype));
                         emit_memcpy(ctx, sret, jl_aliasinfo_t::fromTBAA(ctx, nullptr), retvalinfo,
-                                    jl_datatype_size(jlrettype), julia_alignment(jlrettype), julia_alignment(jlrettype));
+                                    jl_datatype_size(jlrettype), align, align);
                     }
                     else { // must be jl_returninfo_t::Union
                         emit_unionmove(ctx, sret, nullptr, retvalinfo, /*skip*/isboxed_union);
                     }
                 }
                 else {
-                    Type *store_ty = retvalinfo.V->getType();
-                    Value *Val = retvalinfo.V;
-                    if (returninfo.return_roots) {
-                        assert(julia_type_to_llvm(ctx, retvalinfo.typ) == store_ty);
-                        emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
-                    }
-                    ctx.builder.CreateAlignedStore(Val, sret, Align(julia_alignment(retvalinfo.typ)));
+                    ctx.builder.CreateAlignedStore(retvalinfo.V, sret, align);
                     assert(retvalinfo.TIndex == NULL && "unreachable"); // unimplemented representation
                 }
             }
@@ -9395,11 +9638,7 @@ static jl_llvm_functions_t
                     ctx.builder.CreateBr(handlr);
                 }
                 ctx.builder.SetInsertPoint(tryblk);
-                auto ehptr = ctx.builder.CreateInBoundsGEP(
-                    ctx.types().T_ptr,
-                    ct,
-                    ConstantInt::get(ctx.types().T_size, offsetof(jl_task_t, eh) / ctx.types().sizeof_ptr),
-                    "eh");
+                auto ehptr = emit_ptrgep(ctx, ct, offsetof(jl_task_t, eh));
                 ctx.builder.CreateAlignedStore(ehbuf, ehptr, ctx.types().alignof_ptr);
             }
         }
@@ -9427,8 +9666,9 @@ static jl_llvm_functions_t
         PHINode *VN;
         jl_value_t *r;
         AllocaInst *dest;
+        SmallVector<PHINode*,0> roots;
         BasicBlock *PhiBB;
-        std::tie(phi_result, PhiBB, dest, VN, r) = tup;
+        std::tie(phi_result, PhiBB, dest, VN, roots, r) = tup;
         jl_value_t *phiType = phi_result.typ;
         jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
         jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(r, 1);
@@ -9486,6 +9726,7 @@ static jl_llvm_functions_t
                 val = mark_julia_const(ctx, val.constant); // be over-conservative at making sure `.typ` is set concretely, not tindex
             if (!jl_is_uniontype(phiType) || !TindexN) {
                 if (VN) {
+                    assert(roots.empty() && !dest);
                     Value *V;
                     if (val.typ == (jl_value_t*)jl_bottom_type) {
                         V = undef_value_for_type(VN->getType());
@@ -9506,14 +9747,34 @@ static jl_llvm_functions_t
                     VN->addIncoming(V, ctx.builder.GetInsertBlock());
                     assert(!TindexN);
                 }
-                else if (dest && val.typ != (jl_value_t*)jl_bottom_type) {
+                else if ((dest || !roots.empty()) && val.typ != (jl_value_t*)jl_bottom_type) {
                     // must be careful to emit undef here (rather than a bitcast or
                     // load of val) if the runtime type of val isn't phiType
+                    auto tracked = split_value_size((jl_datatype_t*)phiType).second;
                     Value *isvalid = emit_isa_and_defined(ctx, val, phiType);
-                    emit_guarded_test(ctx, isvalid, nullptr, [&] {
-                        emit_unbox_store(ctx, update_julia_type(ctx, val, phiType), dest, ctx.tbaa().tbaa_stack, julia_alignment(phiType));
-                        return nullptr;
+                    assert(roots.size() == tracked && isvalid != nullptr);
+                    SmallVector<Value*,0> incomingroots(0);
+                    if (tracked)
+                        incomingroots.resize(tracked, Constant::getNullValue(ctx.types().T_prjlvalue));
+                    emit_guarded_test(ctx, isvalid, incomingroots, [&] {
+                        jl_cgval_t typedval = update_julia_type(ctx, val, phiType);
+                        SmallVector<Value*,0> mayberoots(tracked, Constant::getNullValue(ctx.types().T_prjlvalue));
+                        if (typedval.typ != jl_bottom_type) {
+                            Align align(julia_alignment(phiType));
+                            if (tracked)
+                                split_value_into(ctx, typedval, align, dest, align, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), mayberoots);
+                            else
+                                emit_unbox_store(ctx, typedval, dest, ctx.tbaa().tbaa_stack, align);
+                        }
+                        return mayberoots;
                     });
+                    for (size_t nr = 0; nr < tracked; nr++)
+                        roots[nr]->addIncoming(incomingroots[nr], ctx.builder.GetInsertBlock());
+                }
+                else if (!roots.empty()) {
+                    Value *V = Constant::getNullValue(ctx.types().T_prjlvalue);
+                    for (size_t nr = 0; nr < roots.size(); nr++)
+                        roots[nr]->addIncoming(V, ctx.builder.GetInsertBlock());
                 }
             }
             else {
@@ -9522,13 +9783,14 @@ static jl_llvm_functions_t
                 // `V` is always initialized when it is used.
                 // Ref https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96629
                 Value *V = nullptr;
+                assert(roots.empty());
                 if (val.typ == (jl_value_t*)jl_bottom_type) {
                     if (VN)
                         V = undef_value_for_type(VN->getType());
                     RTindex = UndefValue::get(getInt8Ty(ctx.builder.getContext()));
                 }
                 else if (jl_is_concrete_type(val.typ) || val.constant) {
-                    size_t tindex = get_box_tindex((jl_datatype_t*)val.typ, phiType);
+                    size_t tindex = get_box_tindex((jl_datatype_t*)(val.constant ? jl_typeof(val.constant) : val.typ), phiType);
                     if (tindex == 0) {
                         if (VN)
                             V = boxed(ctx, val);
@@ -9538,7 +9800,7 @@ static jl_llvm_functions_t
                         if (VN)
                             V = Constant::getNullValue(ctx.types().T_prjlvalue);
                         if (dest)
-                            emit_unbox_store(ctx, val, dest, ctx.tbaa().tbaa_stack, julia_alignment(val.typ));
+                            emit_unbox_store(ctx, val, dest, ctx.tbaa().tbaa_stack, Align(julia_alignment(val.typ)));
                         RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex);
                     }
                 }
@@ -9612,11 +9874,10 @@ static jl_llvm_functions_t
             }
             if (TindexN)
                 TindexN->addIncoming(RTindex, FromBB);
-            if (dest) {
+            if (dest)
                 ctx.builder.CreateLifetimeStart(dest);
-                if (CountTrackedPointers(dest->getAllocatedType()).count)
-                    ctx.builder.CreateStore(Constant::getNullValue(dest->getAllocatedType()), dest);
-            }
+            for (size_t nr = 0; nr < roots.size(); nr++)
+                roots[nr]->addIncoming(Constant::getNullValue(ctx.types().T_prjlvalue), FromBB);
             ctx.builder.ClearInsertionPoint();
         }
     }
@@ -9638,7 +9899,7 @@ static jl_llvm_functions_t
                     // make sure that anything we attempt to call has some inlining info, just in case optimization messed up
                     // (except if we know that it is an intrinsic used in our prologue, which should never have its own debug subprogram)
                     Function *F = call->getCalledFunction();
-                    if (!in_prologue || !F || !(F->isIntrinsic() || F->getName().startswith("julia.") || &I == restTuple)) {
+                    if (!in_prologue || !F || !(F->isIntrinsic() || F->getName().starts_with("julia.") || &I == restTuple)) {
                         I.setDebugLoc(topdebugloc);
                     }
                 }
@@ -9663,15 +9924,19 @@ static jl_llvm_functions_t
 
     if (ctx.vaSlot > 0) {
         // remove VA allocation if we never referenced it
+        assert(ctx.slots[ctx.vaSlot].isSA && ctx.slots[ctx.vaSlot].isArgument);
         Instruction *root = cast_or_null<Instruction>(ctx.slots[ctx.vaSlot].boxroot);
         if (root) {
-            Instruction *store_value = NULL;
             bool have_real_use = false;
             for (Use &U : root->uses()) {
                 User *RU = U.getUser();
                 if (StoreInst *SRU = dyn_cast<StoreInst>(RU)) {
-                    if (!store_value)
-                        store_value = dyn_cast<Instruction>(SRU->getValueOperand());
+                    assert(isa<ConstantPointerNull>(SRU->getValueOperand()) || SRU->getValueOperand() == restTuple);
+                    (void)SRU;
+                }
+                else if (MemSetInst *MSI = dyn_cast<MemSetInst>(RU)) {
+                    assert(MSI->getValue() == ctx.builder.getInt8(0));
+                    (void)MSI;
                 }
                 else if (isa<DbgInfoIntrinsic>(RU)) {
                 }
@@ -9693,7 +9958,6 @@ static jl_llvm_functions_t
                 if (use)
                     use->eraseFromParent();
                 root->eraseFromParent();
-                assert(!store_value || store_value == restTuple);
                 restTuple->eraseFromParent();
             }
         }
@@ -9921,7 +10185,7 @@ void jl_compile_workqueue(
             if (it == params.compiled_functions.end()) {
                 // Reinfer the function. The JIT came along and removed the inferred
                 // method body. See #34993
-                if (policy != CompilationPolicy::Default &&
+                if ((policy != CompilationPolicy::Default || params.params->trim) &&
                     jl_atomic_load_relaxed(&codeinst->inferred) == jl_nothing) {
                     // XXX: SOURCE_MODE_FORCE_SOURCE is wrong here (neither sufficient nor necessary)
                     codeinst = jl_type_infer(codeinst->def, jl_atomic_load_relaxed(&codeinst->max_world), SOURCE_MODE_FORCE_SOURCE);
@@ -9952,6 +10216,16 @@ void jl_compile_workqueue(
         if (proto.specsig) {
             // expected specsig
             if (!preal_specsig) {
+                if (params.params->trim) {
+                    auto it = params.compiled_functions.find(codeinst); //TODO: What to do about this
+                    errs() << "Bailed out to invoke when compiling:";
+                    jl_(codeinst->def);
+                    if (it != params.compiled_functions.end()) {
+                        errs() << it->second.second.functionObject << "\n";
+                        errs() << it->second.second.specFunctionObject << "\n";
+                    } else
+                        errs() << "codeinst not in compile_functions\n";
+                }
                 // emit specsig-to-(jl)invoke conversion
                 StringRef invokeName;
                 if (invoke != NULL)
@@ -10045,7 +10319,6 @@ static void init_jit_functions(void)
     add_named_global(memcmp_func, &memcmp);
     add_named_global(jltypeerror_func, &jl_type_error);
     add_named_global(jlcheckassign_func, &jl_checked_assignment);
-    add_named_global(jldeclareconst_func, &jl_declare_constant);
     add_named_global(jlgetbindingorerror_func, &jl_get_binding_or_error);
     add_named_global(jlgetbindingwrorerror_func, &jl_get_binding_wr);
     add_named_global(jlboundp_func, &jl_boundp);
@@ -10059,7 +10332,7 @@ static void init_jit_functions(void)
     add_named_global(jlcopyast_func, &jl_copy_ast);
     //add_named_global(jlnsvec_func, &jl_svec);
     add_named_global(jlmethod_func, &jl_method_def);
-    add_named_global(jlgenericfunction_func, &jl_generic_function_def);
+    add_named_global(jlgenericfunction_func, &jl_declare_const_gf);
     add_named_global(jlenter_func, &jl_enter_handler);
     add_named_global(jl_current_exception_func, &jl_current_exception);
     add_named_global(jlleave_noexcept_func, &jl_pop_handler_noexcept);
@@ -10098,6 +10371,7 @@ static void init_jit_functions(void)
 
 #ifdef _OS_WINDOWS_
 #if defined(_CPU_X86_64_)
+    add_named_global("__julia_personality", &__julia_personality);
 #if defined(_COMPILER_GCC_)
     add_named_global("___chkstk_ms", &___chkstk_ms);
 #else
@@ -10123,7 +10397,7 @@ static void init_jit_functions(void)
 }
 
 #ifdef JL_USE_INTEL_JITEVENTS
-char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifier
+char jl_using_intel_jitevents = 0; // Non-zero if running under Intel VTune Amplifier
 #endif
 
 #ifdef JL_USE_OPROFILE_JITEVENTS
@@ -10142,6 +10416,22 @@ int jl_opaque_ptrs_set = 0;
 
 extern "C" void jl_init_llvm(void)
 {
+    jl_default_cgparams = {
+        /* track_allocations */ 1,
+        /* code_coverage */ 1,
+        /* prefer_specsig */ 0,
+#ifdef _OS_WINDOWS_
+        /* gnu_pubnames */ 0,
+#else
+        /* gnu_pubnames */ 1,
+#endif
+        /* debug_info_kind */ (int) DICompileUnit::DebugEmissionKind::FullDebug,
+        /* debug_info_level */ (int) jl_options.debug_level,
+        /* safepoint_on_entry */ 1,
+        /* gcstack_arg */ 1,
+        /* use_jlplt*/ 1,
+        /* trim */ 0,
+        /* lookup */ jl_rettype_inferred_addr };
     jl_page_size = jl_getpagesize();
     jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
     jl_default_cgparams.debug_info_level = (int) jl_options.debug_level;
@@ -10221,9 +10511,6 @@ extern "C" void jl_init_llvm(void)
 #if defined(JL_USE_INTEL_JITEVENTS) || \
     defined(JL_USE_OPROFILE_JITEVENTS) || \
     defined(JL_USE_PERF_JITEVENTS)
-#ifdef JL_USE_JITLINK
-#pragma message("JIT profiling support (JL_USE_*_JITEVENTS) not yet available on platforms that use JITLink")
-#else
     const char *jit_profiling = getenv("ENABLE_JITPROFILING");
 
 #if defined(JL_USE_INTEL_JITEVENTS)
@@ -10240,24 +10527,23 @@ extern "C" void jl_init_llvm(void)
 
 #if defined(JL_USE_PERF_JITEVENTS)
     if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_perf_jitevents= 1;
+        jl_using_perf_jitevents = 1;
     }
 #endif
 
 #ifdef JL_USE_INTEL_JITEVENTS
     if (jl_using_intel_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createIntelJITEventListener());
+        jl_ExecutionEngine->enableIntelJITEventListener();
 #endif
 
 #ifdef JL_USE_OPROFILE_JITEVENTS
     if (jl_using_oprofile_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createOProfileJITEventListener());
+        jl_ExecutionEngine->enableOProfileJITEventListener();
 #endif
 
 #ifdef JL_USE_PERF_JITEVENTS
     if (jl_using_perf_jitevents)
-        jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
-#endif
+        jl_ExecutionEngine->enablePerfJITEventListener();
 #endif
 #endif
 
diff --git a/src/coverage.cpp b/src/coverage.cpp
index c061276e66fd9..685370198ff13 100644
--- a/src/coverage.cpp
+++ b/src/coverage.cpp
@@ -207,7 +207,7 @@ extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output)
 {
     if (output) {
         StringRef output_pattern(output);
-        if (output_pattern.endswith(".info"))
+        if (output_pattern.ends_with(".info"))
             write_lcov_data(coverageData, jl_format_filename(output_pattern.str().c_str()));
     }
     else {
diff --git a/src/crc32c.c b/src/crc32c.c
index 1d667b3dbf656..50d2acc603359 100644
--- a/src/crc32c.c
+++ b/src/crc32c.c
@@ -178,6 +178,9 @@ JL_DLLEXPORT uint32_t jl_crc32c(uint32_t crc, const char *buf, size_t len)
     return crc32c_sse42(crc, buf, len);
 }
 #  else
+#if defined(JL_CRC32C_USE_IFUNC) && defined(_COMPILER_CLANG_)
+JL_UNUSED
+#endif
 static crc32c_func_t crc32c_dispatch(void)
 {
     // When used in ifunc, we cannot call external functions (i.e. jl_cpuid)
diff --git a/src/datatype.c b/src/datatype.c
index cb10ef7719dd5..3a2ebf2bb303a 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -20,23 +20,21 @@ extern "C" {
 
 // allocating TypeNames -----------------------------------------------------------
 
-static int is10digit(char c) JL_NOTSAFEPOINT
-{
-    return (c >= '0' && c <= '9');
-}
-
 static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT
 {
     char *n = jl_symbol_name(s);
     if (n[0] != '#')
         return s;
-    char *end = strrchr(n, '#');
+    char *end = strchr(&n[1], '#');
+    // handle `#f...##...#...`
+    if (end != NULL && end[1] == '#')
+        end = strchr(&end[2], '#');
     int32_t len;
-    if (end == n || end == n+1)
+    if (end == NULL || end == n+1)
         len = strlen(n) - 1;
     else
         len = (end-n) - 1;  // extract `f` from `#f#...`
-    if (is10digit(n[1]))
+    if (isdigit(n[1]) || is_canonicalized_anonfn_typename(n))
         return _jl_symbol(n, len+1);
     return _jl_symbol(&n[1], len);
 }
@@ -86,6 +84,7 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
     tn->atomicfields = NULL;
     tn->constfields = NULL;
     tn->max_methods = 0;
+    tn->constprop_heustic = 0;
     return tn;
 }
 
@@ -291,10 +290,6 @@ static jl_datatype_layout_t *jl_get_layout(uint32_t sz,
     if ((void*)ret == HT_NOTFOUND) {
         if (!should_malloc) {
             char *perm_mem = (char *)jl_gc_perm_alloc(flddesc_sz, 0, 4, 0);
-#ifdef MMTK_GC
-    jl_ptls_t ptls = jl_current_task->ptls;
-    mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(perm_mem), flddesc_sz);
-#endif
             assert(perm_mem);
             ret = (jl_datatype_layout_t *)perm_mem;
             memcpy(perm_mem, flddesc, flddesc_sz);
@@ -366,6 +361,8 @@ int jl_struct_try_layout(jl_datatype_t *dt)
 
 int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree)
 {
+    if (jl_typeofbottom_type && ty == jl_typeofbottom_type->super)
+        ty = jl_typeofbottom_type;
     if (ty->name->mayinlinealloc && jl_struct_try_layout(ty)) {
         if (ty->layout->npointers > 0) {
             if (pointerfree)
@@ -945,6 +942,10 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     uint32_t nbytes = (nbits + 7) / 8;
     uint32_t alignm = next_power_of_two(nbytes);
+# if defined(_CPU_X86_) && !defined(_OS_WINDOWS_)
+    if (alignm == 8)
+        alignm = 4;
+# endif
     if (alignm > MAX_ALIGN)
         alignm = MAX_ALIGN;
     // memoize isprimitivetype, since it is much easier than checking
@@ -972,10 +973,6 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
     jl_datatype_layout_t *layout = (jl_datatype_layout_t *)
       jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t),
         0, 4, 0);
-#ifdef MMTK_GC
-    jl_ptls_t ptls = jl_current_task->ptls;
-    mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(layout), sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t));
-#endif
     layout->size = large ? GC_MAX_SZCLASS+1 : 0;
     layout->nfields = 0;
     layout->alignment = sizeof(void *);
@@ -1263,7 +1260,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
     }
     else if (nb == 1) {
         uint8_t *y8 = (uint8_t*)y;
-        assert(!dt->layout->flags.haspadding);
+        assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
         if (dt == et) {
             *y8 = *(uint8_t*)expected;
             uint8_t z8 = *(uint8_t*)src;
@@ -1276,7 +1273,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
     }
     else if (nb == 2) {
         uint16_t *y16 = (uint16_t*)y;
-        assert(!dt->layout->flags.haspadding);
+        assert(dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding);
         if (dt == et) {
             *y16 = *(uint16_t*)expected;
             uint16_t z16 = *(uint16_t*)src;
@@ -1294,7 +1291,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
             uint32_t z32 = zext_read32(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
-                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1311,7 +1308,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
             uint64_t z64 = zext_read64(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
-                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1329,7 +1326,7 @@ JL_DLLEXPORT int jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_value_t *y /* pre-
             jl_uint128_t z128 = zext_read128(src, nb);
             while (1) {
                 success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
-                if (success || !dt->layout->flags.haspadding || !jl_egal__bits(y, expected, dt))
+                if (success || (dt->layout->flags.isbitsegal && !dt->layout->flags.haspadding) || !jl_egal__bits(y, expected, dt))
                     break;
             }
         }
@@ -1665,6 +1662,8 @@ JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 {
     jl_task_t *ct = jl_current_task;
     if (!jl_is_datatype(type) || !type->isconcretetype || type->layout == NULL || jl_is_layout_opaque(type->layout)) {
+        if (type == jl_typeofbottom_type->super)
+            return jl_bottom_type; // ::Type{Union{}} is an abstract type, but is also a singleton when used as a field type
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
     }
     if (type->instance != NULL)
@@ -2023,7 +2022,7 @@ inline jl_value_t *modify_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value_
         else {
             char *px = lock(p, parent, needlock, isatomic);
             int success = memcmp(px, (char*)r, fsz) == 0;
-            if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding)
+            if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
                 success = jl_egal__bits((jl_value_t*)px, r, (jl_datatype_t*)rty);
             if (success) {
                 if (isunion) {
@@ -2138,7 +2137,7 @@ inline jl_value_t *replace_bits(jl_value_t *ty, char *p, uint8_t *psel, jl_value
         success = (rty == jl_typeof(expected));
         if (success) {
             success = memcmp((char*)r, (char*)expected, rsz) == 0;
-            if (!success && ((jl_datatype_t*)rty)->layout->flags.haspadding)
+            if (!success && (!((jl_datatype_t*)rty)->layout->flags.isbitsegal || ((jl_datatype_t*)rty)->layout->flags.haspadding))
                 success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
         }
         *((uint8_t*)r + fsz) = success ? 1 : 0;
@@ -2197,7 +2196,7 @@ inline int setonce_bits(jl_datatype_t *rty, char *p, jl_value_t *parent, jl_valu
     }
     else {
         char *px = lock(p, parent, needlock, isatomic);
-        success = undefref_check(rty, (jl_value_t*)px) != NULL;
+        success = undefref_check(rty, (jl_value_t*)px) == NULL;
         if (success)
             memassign_safe(hasptr, px, rhs, fsz);
         unlock(p, parent, needlock, isatomic);
diff --git a/src/debug-registry.h b/src/debug-registry.h
index f30049eb5b210..4c9e13d8cd72d 100644
--- a/src/debug-registry.h
+++ b/src/debug-registry.h
@@ -99,18 +99,26 @@ class JITDebugInfoRegistry
     };
 private:
 
-    struct ObjectInfo {
-        const llvm::object::ObjectFile *object = nullptr;
-        size_t SectionSize = 0;
-        ptrdiff_t slide = 0;
-        llvm::object::SectionRef Section{};
-        llvm::DIContext *context = nullptr;
+    struct LazyObjectInfo {
+        SmallVector<uint8_t, 0> data;
+        size_t uncompressedsize;
+        std::unique_ptr<const llvm::object::ObjectFile> object;
+        std::unique_ptr<llvm::DIContext> context;
+        LazyObjectInfo() = delete;
+    };
+
+    struct SectionInfo {
+        LazyObjectInfo *object;
+        size_t SectionSize;
+        ptrdiff_t slide;
+        uint64_t SectionIndex;
+        SectionInfo() = delete;
     };
 
     template<typename KeyT, typename ValT>
     using rev_map = std::map<KeyT, ValT, std::greater<KeyT>>;
 
-    typedef rev_map<size_t, ObjectInfo> objectmap_t;
+    typedef rev_map<size_t, SectionInfo> objectmap_t;
     typedef rev_map<uint64_t, objfileentry_t> objfilemap_t;
 
     objectmap_t objectmap{};
@@ -137,8 +145,7 @@ class JITDebugInfoRegistry
     void add_code_in_flight(llvm::StringRef name, jl_code_instance_t *codeinst, const llvm::DataLayout &DL) JL_NOTSAFEPOINT;
     jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT;
     void registerJITObject(const llvm::object::ObjectFile &Object,
-                        std::function<uint64_t(const llvm::StringRef &)> getLoadAddress,
-                        std::function<void*(void*)> lookupWriteAddress);
+                        std::function<uint64_t(const llvm::StringRef &)> getLoadAddress);
     objectmap_t& getObjectMap() JL_NOTSAFEPOINT;
     void add_image_info(image_info_t info) JL_NOTSAFEPOINT;
     bool get_image_info(uint64_t base, image_info_t *info) const JL_NOTSAFEPOINT;
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 5cb7401a036d4..cfaf8d4c70ee9 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -7,6 +7,7 @@
 #include <llvm/DebugInfo/DWARF/DWARFContext.h>
 #include <llvm/Object/SymbolSize.h>
 #include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/MemoryBufferRef.h>
 #include <llvm/IR/Function.h>
 #include <llvm/ADT/StringRef.h>
 #include <llvm/ADT/StringMap.h>
@@ -222,11 +223,21 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
 #endif
 
 void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
-                        std::function<uint64_t(const StringRef &)> getLoadAddress,
-                        std::function<void*(void*)> lookupWriteAddress)
+                        std::function<uint64_t(const StringRef &)> getLoadAddress)
 {
     object::section_iterator EndSection = Object.section_end();
 
+    bool anyfunctions = false;
+    for (const object::SymbolRef &sym_iter : Object.symbols()) {
+        object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType());
+        if (SymbolType != object::SymbolRef::ST_Function)
+            continue;
+        anyfunctions = true;
+        break;
+    }
+    if (!anyfunctions)
+        return;
+
 #ifdef _CPU_ARM_
     // ARM does not have/use .eh_frame
     uint64_t arm_exidx_addr = 0;
@@ -280,14 +291,13 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
 #if defined(_OS_WINDOWS_)
     uint64_t SectionAddrCheck = 0;
     uint64_t SectionLoadCheck = 0; (void)SectionLoadCheck;
-    uint64_t SectionWriteCheck = 0; (void)SectionWriteCheck;
     uint8_t *UnwindData = NULL;
 #if defined(_CPU_X86_64_)
     uint8_t *catchjmp = NULL;
     for (const object::SymbolRef &sym_iter : Object.symbols()) {
         StringRef sName = cantFail(sym_iter.getName());
         if (sName.equals("__UnwindData") || sName.equals("__catchjmp")) {
-            uint64_t Addr = cantFail(sym_iter.getAddress());
+            uint64_t Addr = cantFail(sym_iter.getAddress()); // offset into object (including section offset)
             auto Section = cantFail(sym_iter.getSection());
             assert(Section != EndSection && Section->isText());
             uint64_t SectionAddr = Section->getAddress();
@@ -299,10 +309,7 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
                         SectionLoadCheck == SectionLoadAddr);
             SectionAddrCheck = SectionAddr;
             SectionLoadCheck = SectionLoadAddr;
-            SectionWriteCheck = SectionLoadAddr;
-            if (lookupWriteAddress)
-                SectionWriteCheck = (uintptr_t)lookupWriteAddress((void*)SectionLoadAddr);
-            Addr += SectionWriteCheck - SectionLoadCheck;
+            Addr += SectionLoadAddr - SectionAddr;
             if (sName.equals("__UnwindData")) {
                 UnwindData = (uint8_t*)Addr;
             }
@@ -313,30 +320,16 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
     }
     assert(catchjmp);
     assert(UnwindData);
-    assert(SectionAddrCheck);
     assert(SectionLoadCheck);
-    assert(!memcmp(catchjmp, "\0\0\0\0\0\0\0\0\0\0\0\0", 12) &&
-            !memcmp(UnwindData, "\0\0\0\0\0\0\0\0\0\0\0\0", 12));
-    catchjmp[0] = 0x48;
-    catchjmp[1] = 0xb8; // mov RAX, QWORD PTR [&__julia_personality]
-    *(uint64_t*)(&catchjmp[2]) = (uint64_t)&__julia_personality;
-    catchjmp[10] = 0xff;
-    catchjmp[11] = 0xe0; // jmp RAX
-    UnwindData[0] = 0x09; // version info, UNW_FLAG_EHANDLER
-    UnwindData[1] = 4;    // size of prolog (bytes)
-    UnwindData[2] = 2;    // count of unwind codes (slots)
-    UnwindData[3] = 0x05; // frame register (rbp) = rsp
-    UnwindData[4] = 4;    // second instruction
-    UnwindData[5] = 0x03; // mov RBP, RSP
-    UnwindData[6] = 1;    // first instruction
-    UnwindData[7] = 0x50; // push RBP
-    *(DWORD*)&UnwindData[8] = (DWORD)(catchjmp - (uint8_t*)SectionWriteCheck); // relative location of catchjmp
-    UnwindData -= SectionWriteCheck - SectionLoadCheck;
 #endif // defined(_OS_X86_64_)
 #endif // defined(_OS_WINDOWS_)
 
+    SmallVector<uint8_t, 0> packed;
+    compression::zlib::compress(ArrayRef<uint8_t>((uint8_t*)Object.getData().data(), Object.getData().size()), packed, compression::zlib::DefaultCompression);
+    jl_jit_add_bytes(packed.size());
+    auto ObjectCopy = new LazyObjectInfo{packed, Object.getData().size()}; // intentionally leaked so that we don't need to ref-count it, intentionally copied so that we exact-size the allocation (since no shrink_to_fit function)
     auto symbols = object::computeSymbolSizes(Object);
-    bool first = true;
+    bool hassection = false;
     for (const auto &sym_size : symbols) {
         const object::SymbolRef &sym_iter = sym_size.first;
         object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType());
@@ -348,7 +341,7 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
         uint64_t SectionAddr = Section->getAddress();
         StringRef secName = cantFail(Section->getName());
         uint64_t SectionLoadAddr = getLoadAddress(secName);
-        Addr -= SectionAddr - SectionLoadAddr;
+        Addr += SectionLoadAddr - SectionAddr;
         StringRef sName = cantFail(sym_iter.getName());
         uint64_t SectionSize = Section->getSize();
         size_t Size = sym_size.second;
@@ -385,24 +378,23 @@ void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
         jl_profile_atomic([&]() JL_NOTSAFEPOINT {
             if (mi)
                 linfomap[Addr] = std::make_pair(Size, mi);
-            if (first) {
-                objectmap[SectionLoadAddr] = {&Object,
-                    (size_t)SectionSize,
-                    (ptrdiff_t)(SectionAddr - SectionLoadAddr),
-                    *Section,
-                    nullptr,
-                    };
-                first = false;
-            }
+            hassection = true;
+            objectmap.insert(std::pair{SectionLoadAddr, SectionInfo{
+                ObjectCopy,
+                (size_t)SectionSize,
+                (ptrdiff_t)(SectionAddr - SectionLoadAddr),
+                Section->getIndex()
+                }});
         });
     }
+    if (!hassection) // clang-sa demands that we do this to fool cplusplus.NewDeleteLeaks
+        delete ObjectCopy;
 }
 
 void jl_register_jit_object(const object::ObjectFile &Object,
-                            std::function<uint64_t(const StringRef &)> getLoadAddress,
-                            std::function<void *(void *)> lookupWriteAddress)
+                            std::function<uint64_t(const StringRef &)> getLoadAddress)
 {
-    getJITDebugRegistry().registerJITObject(Object, getLoadAddress, lookupWriteAddress);
+    getJITDebugRegistry().registerJITObject(Object, getLoadAddress);
 }
 
 // TODO: convert the safe names from aotcomile.cpp:makeSafeName back into symbols
@@ -1213,11 +1205,33 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
     auto fit = objmap.lower_bound(fptr);
     if (fit != objmap.end() && fptr < fit->first + fit->second.SectionSize) {
         *slide = fit->second.slide;
-        *Section = fit->second.Section;
-        if (context) {
-            if (fit->second.context == nullptr)
-                fit->second.context = DWARFContext::create(*fit->second.object).release();
-            *context = fit->second.context;
+        auto lazyobject = fit->second.object;
+        if (!lazyobject->object && !lazyobject->data.empty()) {
+            if (lazyobject->uncompressedsize) {
+                SmallVector<uint8_t, 0> unpacked;
+                Error E = compression::zlib::decompress(lazyobject->data, unpacked, lazyobject->uncompressedsize);
+                if (E)
+                    lazyobject->data.clear();
+                else
+                    lazyobject->data = std::move(unpacked);
+                jl_jit_add_bytes(lazyobject->data.size() - lazyobject->uncompressedsize);
+                lazyobject->uncompressedsize = 0;
+            }
+            if (!lazyobject->data.empty()) {
+                auto obj = object::ObjectFile::createObjectFile(MemoryBufferRef(StringRef((const char*)lazyobject->data.data(), lazyobject->data.size()), "jit.o"));
+                if (obj)
+                    lazyobject->object = std::move(*obj);
+                else
+                    lazyobject->data.clear();
+            }
+        }
+        if (lazyobject->object) {
+            *Section = *std::next(lazyobject->object->section_begin(), fit->second.SectionIndex);
+            if (context) {
+                if (lazyobject->context == nullptr)
+                    lazyobject->context = DWARFContext::create(*lazyobject->object);
+                *context = lazyobject->context.get();
+            }
         }
         found = 1;
     }
diff --git a/src/debuginfo.h b/src/debuginfo.h
index 5b5cdcb82d534..6cd7528910765 100644
--- a/src/debuginfo.h
+++ b/src/debuginfo.h
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 // Declarations for debuginfo.cpp
+void jl_jit_add_bytes(size_t bytes) JL_NOTSAFEPOINT;
 
 int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
         llvm::object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT;
diff --git a/src/disasm.cpp b/src/disasm.cpp
index b24c374607113..b71503c3f7a77 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -1224,7 +1224,11 @@ jl_value_t *jl_dump_function_asm_impl(jl_llvmf_dump_t* dump, char emit_mc, const
         addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
         if (emit_mc) {
             raw_svector_ostream obj_OS(ObjBufferSV);
+#if JL_LLVM_VERSION >= 180000
+            if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CodeGenFileType::ObjectFile, false, nullptr))
+#else
             if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
+#endif
                 return jl_an_empty_string;
             TSM->withModuleDo([&](Module &m) { PM.run(m); });
         }
diff --git a/src/dlload.c b/src/dlload.c
index 484c36a228886..91980cc4ecbbf 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -309,7 +309,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     */
     if (!abspath && !is_atpath && jl_base_module != NULL) {
         jl_binding_t *b = jl_get_module_binding(jl_base_module, jl_symbol("DL_LOAD_PATH"), 0);
-        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_atomic_load_relaxed(&b->value) : NULL);
+        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_get_binding_value(b) : NULL);
         if (DL_LOAD_PATH != NULL) {
             size_t j;
             for (j = 0; j < jl_array_nrows(DL_LOAD_PATH); j++) {
diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h
index 669753a9f5302..f8dd1cfd81ed0 100644
--- a/src/flisp/flisp.h
+++ b/src/flisp/flisp.h
@@ -158,7 +158,7 @@ value_t fl_cons(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT;
 value_t fl_list2(fl_context_t *fl_ctx, value_t a, value_t b) JL_NOTSAFEPOINT;
 value_t fl_listn(fl_context_t *fl_ctx, size_t n, ...) JL_NOTSAFEPOINT;
 value_t symbol(fl_context_t *fl_ctx, const char *str) JL_NOTSAFEPOINT;
-char *symbol_name(fl_context_t *fl_ctx, value_t v);
+char *symbol_name(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
 int fl_is_keyword_name(const char *str, size_t len);
 value_t alloc_vector(fl_context_t *fl_ctx, size_t n, int init);
 size_t llength(value_t v);
diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp
index c7ee32269138a..5b462d48cd2de 100644
--- a/src/gc-alloc-profiler.cpp
+++ b/src/gc-alloc-profiler.cpp
@@ -3,7 +3,6 @@
 #include "gc-alloc-profiler.h"
 
 #include "julia_internal.h"
-#include "gc.h"
 
 #include "llvm/ADT/SmallVector.h"
 
diff --git a/src/gc-common.c b/src/gc-common.c
index 98ef3f62125f9..6ce455d3923ad 100644
--- a/src/gc-common.c
+++ b/src/gc-common.c
@@ -1,90 +1,135 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "julia.h"
+#include "julia_atomics.h"
+#include "julia_gcext.h"
+#include "julia_assert.h"
+#include "threading.h"
+#ifdef __GLIBC__
+#include <malloc.h> // for malloc_trim
+#endif
 
-jl_gc_num_t gc_num = {0};
-gc_heapstatus_t gc_heap_stats = {0};
-size_t last_long_collect_interval;
-int gc_n_threads;
-jl_ptls_t* gc_all_tls_states;
-// `tid` of first GC thread
-int gc_first_tid;
+#ifdef __cplusplus
+extern "C" {
+#endif
 
-int64_t live_bytes = 0;
-// global variables for GC stats
-uint64_t freed_in_runtime = 0;
+// =========================================================================== //
+// GC Metrics
+// =========================================================================== //
 
-// These should be moved to gc.c
+jl_gc_num_t gc_num = {0};
 
-// Number of GC threads that may run parallel marking
-int jl_n_markthreads;
-// Number of GC threads that may run concurrent sweeping (0 or 1)
-int jl_n_sweepthreads;
+JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
+{
+    return gc_num.total_time;
+}
 
-JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
+// =========================================================================== //
+// GC Callbacks
+// =========================================================================== //
 
-// mutex for gc-heap-snapshot.
-jl_mutex_t heapsnapshot_lock;
+jl_gc_callback_list_t *gc_cblist_root_scanner;
+jl_gc_callback_list_t *gc_cblist_task_scanner;
+jl_gc_callback_list_t *gc_cblist_pre_gc;
+jl_gc_callback_list_t *gc_cblist_post_gc;
+jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
+jl_gc_callback_list_t *gc_cblist_notify_external_free;
+jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
 
-const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
-JL_DLLEXPORT uintptr_t jl_get_buff_tag(void)
+static void jl_gc_register_callback(jl_gc_callback_list_t **list,
+        jl_gc_cb_func_t func)
 {
-    return jl_buff_tag;
+    while (*list != NULL) {
+        if ((*list)->func == func)
+            return;
+        list = &((*list)->next);
+    }
+    *list = (jl_gc_callback_list_t *)malloc_s(sizeof(jl_gc_callback_list_t));
+    (*list)->next = NULL;
+    (*list)->func = func;
 }
 
-// GC knobs and self-measurement variables
-int64_t last_gc_total_bytes = 0;
+static void jl_gc_deregister_callback(jl_gc_callback_list_t **list,
+        jl_gc_cb_func_t func)
+{
+    while (*list != NULL) {
+        if ((*list)->func == func) {
+            jl_gc_callback_list_t *tmp = *list;
+            (*list) = (*list)->next;
+            free(tmp);
+            return;
+        }
+        list = &((*list)->next);
+    }
+}
 
-// max_total_memory is a suggestion.  We try very hard to stay
-// under this limit, but we will go above it rather than halting.
-#ifdef _P64
-const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
-size_t total_mem;
-// We expose this to the user/ci as jl_gc_set_max_memory
-memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
-#else
-typedef uint32_t memsize_t;
-const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
-// Work really hard to stay within 2GB
-// Alternative is to risk running out of address space
-// on 32 bit architectures.
-#define MAX32HEAP 1536 * 1024 * 1024
-memsize_t max_total_memory = (memsize_t) MAX32HEAP;
-#endif
-uint64_t old_alloc_diff = default_collect_interval;
-uint64_t old_freed_diff = default_collect_interval;
+JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
+}
 
+JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
+}
 
-// finalizers
-// ---
-uint64_t finalizer_rngState[JL_RNG_SIZE];
-jl_mutex_t finalizers_lock;
-// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
-// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
-// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
-//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
-// `to_finalize` should not have tagged pointers.
-arraylist_t finalizer_list_marked;
-arraylist_t to_finalize;
+JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
+}
 
-void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
+}
 
-JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
+JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
 {
-    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
 }
 
-// The first two entries are assumed to be empty and the rest are assumed to
-// be pointers to `jl_value_t` objects
-STATIC_INLINE void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
 {
-    void **items = list->items;
-    items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
-    items[1] = ct->gcstack;
-    ct->gcstack = (jl_gcframe_t*)items;
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
+}
+
+JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
+{
+    if (enable)
+        jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
+    else
+        jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
 }
 
-STATIC_INLINE void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
+// =========================================================================== //
+// Finalization
+// =========================================================================== //
+
+jl_mutex_t finalizers_lock;
+arraylist_t finalizer_list_marked;
+arraylist_t to_finalize;
+JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
+
+void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
 {
     arraylist_push(&to_finalize, o);
     arraylist_push(&to_finalize, f);
@@ -115,38 +160,72 @@ void run_finalizer(jl_task_t *ct, void *o, void *ff)
     }
 }
 
-void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
+// if `need_sync` is true, the `list` is the `finalizers` list of another
+// thread and we need additional synchronizations
+static void finalize_object(arraylist_t *list, jl_value_t *o,
+                            arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
 {
-    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
-    arraylist_t *a = &ptls->finalizers;
-    // This acquire load and the release store at the end are used to
-    // synchronize with `finalize_object` on another thread. Apart from the GC,
-    // which is blocked by entering a unsafe region, there might be only
-    // one other thread accessing our list in `finalize_object`
-    // (only one thread since it needs to acquire the finalizer lock).
-    // Similar to `finalize_object`, all content mutation has to be done
-    // between the acquire and the release of the length.
-    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
-    if (__unlikely(oldlen + 2 > a->max)) {
-        JL_LOCK_NOGC(&finalizers_lock);
-        // `a->len` might have been modified.
-        // Another possibility is to always grow the array to `oldlen + 2` but
-        // it's simpler this way and uses slightly less memory =)
-        oldlen = a->len;
-        arraylist_grow(a, 2);
-        a->len = oldlen;
-        JL_UNLOCK_NOGC(&finalizers_lock);
+    // The acquire load makes sure that the first `len` objects are valid.
+    // If `need_sync` is true, all mutations of the content should be limited
+    // to the first `oldlen` elements and no mutation is allowed after the
+    // new length is published with the `cmpxchg` at the end of the function.
+    // This way, the mutation should not conflict with the owning thread,
+    // which only writes to locations later than `len`
+    // and will not resize the buffer without acquiring the lock.
+    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
+    size_t oldlen = len;
+    void **items = list->items;
+    size_t j = 0;
+    for (size_t i = 0; i < len; i += 2) {
+        void *v = items[i];
+        int move = 0;
+        if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
+            void *f = items[i + 1];
+            move = 1;
+            arraylist_push(copied_list, v);
+            arraylist_push(copied_list, f);
+        }
+        if (move || __unlikely(!v)) {
+            // remove item
+        }
+        else {
+            if (j < i) {
+                items[j] = items[i];
+                items[j+1] = items[i+1];
+            }
+            j += 2;
+        }
     }
-    void **items = a->items;
-    items[oldlen] = v;
-    items[oldlen + 1] = f;
-    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
+    len = j;
+    if (oldlen == len)
+        return;
+    if (need_sync) {
+        // The memset needs to be unconditional since the thread might have
+        // already read the length.
+        // The `memset` (like any other content mutation) has to be done
+        // **before** the `cmpxchg` which publishes the length.
+        memset(&items[len], 0, (oldlen - len) * sizeof(void*));
+        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
+    }
+    else {
+        list->len = len;
+    }
+}
+
+// The first two entries are assumed to be empty and the rest are assumed to
+// be pointers to `jl_value_t` objects
+static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT
+{
+    void **items = list->items;
+    items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
+    items[1] = ct->gcstack;
+    ct->gcstack = (jl_gcframe_t*)items;
 }
 
 // Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
 // to be hold for the current thread and will release the lock when the
 // function returns.
-void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
+static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPOINT_LEAVE
 {
     // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
     // of `finalizer`) in a finalizer:
@@ -168,6 +247,15 @@ void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list) JL_NOTSAFEPO
     ct->sticky = sticky;
 }
 
+static uint64_t finalizer_rngState[JL_RNG_SIZE];
+
+void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void jl_gc_init_finalizer_rng_state(void)
+{
+    jl_rng_split(finalizer_rngState, jl_current_task->rngState);
+}
+
 void run_finalizers(jl_task_t *ct, int finalizers_thread)
 {
     // Racy fast path:
@@ -204,123 +292,73 @@ void run_finalizers(jl_task_t *ct, int finalizers_thread)
     memcpy(&ct->rngState[0], &save_rngState[0], sizeof(save_rngState));
 }
 
-// if `need_sync` is true, the `list` is the `finalizers` list of another
-// thread and we need additional synchronizations
-void finalize_object(arraylist_t *list, jl_value_t *o,
-                            arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
 {
-    // The acquire load makes sure that the first `len` objects are valid.
-    // If `need_sync` is true, all mutations of the content should be limited
-    // to the first `oldlen` elements and no mutation is allowed after the
-    // new length is published with the `cmpxchg` at the end of the function.
-    // This way, the mutation should not conflict with the owning thread,
-    // which only writes to locations later than `len`
-    // and will not resize the buffer without acquiring the lock.
-    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
-    size_t oldlen = len;
-    void **items = list->items;
-    size_t j = 0;
-    for (size_t i = 0; i < len; i += 2) {
-        void *v = items[i];
-        int move = 0;
-        if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
-            void *f = items[i + 1];
-            move = 1;
-            arraylist_push(copied_list, v);
-            arraylist_push(copied_list, f);
-        }
-        if (move || __unlikely(!v)) {
-            // remove item
-        }
-        else {
-            if (j < i) {
-                items[j] = items[i];
-                items[j+1] = items[i+1];
-            }
-            j += 2;
-        }
-    }
-    len = j;
-    if (oldlen == len)
-        return;
-    if (need_sync) {
-        // The memset needs to be unconditional since the thread might have
-        // already read the length.
-        // The `memset` (like any other content mutation) has to be done
-        // **before** the `cmpxchg` which publishes the length.
-        memset(&items[len], 0, (oldlen - len) * sizeof(void*));
-        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
-    }
-    else {
-        list->len = len;
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0 && ptls->engine_nqueued == 0) {
+        run_finalizers(ct, 0);
     }
 }
 
-JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
 {
-    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
+    if (ptls == NULL)
+        ptls = jl_current_task->ptls;
+    return ptls->finalizers_inhibited;
 }
 
-// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
-JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
 {
-    assert(!gc_ptr_tag(v, 3));
-    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
+    jl_ptls_t ptls = jl_current_task->ptls;
+    ptls->finalizers_inhibited++;
 }
 
-JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
 {
-    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
-        jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
-    }
-    else {
-        jl_gc_add_finalizer_(ptls, v, f);
-    }
+    jl_task_t *ct = jl_current_task;
+#ifdef NDEBUG
+    ct->ptls->finalizers_inhibited--;
+#else
+    jl_gc_enable_finalizers(ct, 1);
+#endif
 }
 
-JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
+JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
 {
     if (ct == NULL)
         ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0 && ptls->engine_nqueued == 0) {
-        run_finalizers(ct, 0);
+    int old_val = ptls->finalizers_inhibited;
+    int new_val = old_val + (on ? -1 : 1);
+    if (new_val < 0) {
+        JL_TRY {
+            jl_error(""); // get a backtrace
+        }
+        JL_CATCH {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
+            // Only print the backtrace once, to avoid spamming the logs
+            static int backtrace_printed = 0;
+            if (backtrace_printed == 0) {
+                backtrace_printed = 1;
+                jlbacktrace(); // written to STDERR_FILENO
+            }
+        }
+        return;
+    }
+    ptls->finalizers_inhibited = new_val;
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
+        jl_gc_run_pending_finalizers(ct);
     }
 }
 
-JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
+JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
 {
-    JL_LOCK_NOGC(&finalizers_lock);
-    // Copy the finalizers into a temporary list so that code in the finalizer
-    // won't change the list as we loop through them.
-    // This list is also used as the GC frame when we are running the finalizers
-    arraylist_t copied_list;
-    arraylist_new(&copied_list, 0);
-    // No need to check the to_finalize list since the user is apparently
-    // still holding a reference to the object
-    int gc_n_threads;
-    jl_ptls_t* gc_all_tls_states;
-    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
-    }
-    finalize_object(&finalizer_list_marked, o, &copied_list, 0);
-    gc_n_threads = 0;
-    gc_all_tls_states = NULL;
-    if (copied_list.len > 0) {
-        // This releases the finalizers lock.
-        jl_gc_run_finalizers_in_list(ct, &copied_list);
-    }
-    else {
-        JL_UNLOCK_NOGC(&finalizers_lock);
-    }
-    arraylist_free(&copied_list);
+    return jl_current_task->ptls->in_finalizer;
 }
 
-void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
+static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
 {
     void **items = flist->items;
     size_t len = flist->len;
@@ -354,360 +392,117 @@ void jl_gc_run_all_finalizers(jl_task_t *ct)
     run_finalizers(ct, 1);
 }
 
-JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
-{
-    if (ptls == NULL)
-        ptls = jl_current_task->ptls;
-    return ptls->finalizers_inhibited;
-}
-
-JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    ptls->finalizers_inhibited++;
-}
-
-JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
-{
-    jl_task_t *ct = jl_current_task;
-#ifdef NDEBUG
-    ct->ptls->finalizers_inhibited--;
-#else
-    jl_gc_enable_finalizers(ct, 1);
-#endif
-}
-
-JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
+void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
 {
-    if (ct == NULL)
-        ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    int old_val = ptls->finalizers_inhibited;
-    int new_val = old_val + (on ? -1 : 1);
-    if (new_val < 0) {
-        JL_TRY {
-            jl_error(""); // get a backtrace
-        }
-        JL_CATCH {
-            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
-            // Only print the backtrace once, to avoid spamming the logs
-            static int backtrace_printed = 0;
-            if (backtrace_printed == 0) {
-                backtrace_printed = 1;
-                jlbacktrace(); // written to STDERR_FILENO
-            }
-        }
-        return;
-    }
-    ptls->finalizers_inhibited = new_val;
-    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
-        jl_gc_run_pending_finalizers(ct);
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_STATE_UNSAFE);
+    arraylist_t *a = &ptls->finalizers;
+    // This acquire load and the release store at the end are used to
+    // synchronize with `finalize_object` on another thread. Apart from the GC,
+    // which is blocked by entering a unsafe region, there might be only
+    // one other thread accessing our list in `finalize_object`
+    // (only one thread since it needs to acquire the finalizer lock).
+    // Similar to `finalize_object`, all content mutation has to be done
+    // between the acquire and the release of the length.
+    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
+    if (__unlikely(oldlen + 2 > a->max)) {
+        JL_LOCK_NOGC(&finalizers_lock);
+        // `a->len` might have been modified.
+        // Another possibility is to always grow the array to `oldlen + 2` but
+        // it's simpler this way and uses slightly less memory =)
+        oldlen = a->len;
+        arraylist_grow(a, 2);
+        a->len = oldlen;
+        JL_UNLOCK_NOGC(&finalizers_lock);
     }
+    void **items = a->items;
+    items[oldlen] = v;
+    items[oldlen + 1] = f;
+    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
 }
 
-JL_DLLEXPORT int8_t jl_gc_is_in_finalizer(void)
-{
-    return jl_current_task->ptls->in_finalizer;
-}
-
-// allocation
-// ---
-
-JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
-{
-    return jl_gc_alloc_(ptls, sz, ty);
-}
-
-// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
-JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type)
+JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
 {
-    jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
-    maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
-    return val;
-}
-
-// This wrapper exists only to prevent `jl_gc_big_alloc_inner` from being inlined into
-// its callers. We provide an external-facing interface for callers, and inline `jl_gc_big_alloc_inner`
-// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) {
-    return jl_gc_big_alloc_inner(ptls, sz);
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
 }
 
-// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
-JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, int osize, jl_value_t* type)
+// schedule f(v) to call at the next quiescent interval (aka after the next safepoint/region on all threads)
+JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT
 {
-    jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
-    maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
-    return val;
-}
-
-// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into
-// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner`
-// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) {
-    return jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
+    assert(!gc_ptr_tag(v, 3));
+    jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 3), f);
 }
 
-
-// tracking Memorys with malloc'd storage
-STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
 {
-    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz;
-    if (alloc_acc < 16*1024)
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc);
-    else {
-        jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0);
-    }
-}
-
-
-void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
-    // This is **NOT** a GC safe point.
-    mallocarray_t *ma;
-    if (ptls->gc_tls.heap.mafreelist == NULL) {
-        ma = (mallocarray_t*)malloc_s(sizeof(mallocarray_t));
+    if (__unlikely(jl_typetagis(f, jl_voidpointer_type))) {
+        jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
     }
     else {
-        ma = ptls->gc_tls.heap.mafreelist;
-        ptls->gc_tls.heap.mafreelist = ma->next;
-    }
-    ma->a = (jl_value_t*)((uintptr_t)m | !!isaligned);
-    ma->next = ptls->gc_tls.heap.mallocarrays;
-    ptls->gc_tls.heap.mallocarrays = ma;
-}
-
-JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void)
-{
-    int n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    int64_t pool_live_bytes = 0;
-    for (int i = 0; i < n_threads; i++) {
-        jl_ptls_t ptls2 = all_tls_states[i];
-        if (ptls2 != NULL) {
-            pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes);
-        }
+        jl_gc_add_finalizer_(ptls, v, f);
     }
-    return pool_live_bytes;
 }
 
-void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
 {
     jl_ptls_t ptls = jl_current_task->ptls;
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz);
-    jl_batch_accum_heap_size(ptls, sz);
-}
-
-void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
-{
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc) + sz);
-}
-
-void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
-{
-    jl_batch_accum_free_size(jl_current_task->ptls, sz);
-}
-
-void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
-{
-    assert(jl_is_genericmemory(v));
-    jl_genericmemory_t *m = (jl_genericmemory_t*)v;
-    assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
-    char *d = (char*)m->ptr;
-    if (isaligned)
-        jl_free_aligned(d);
-    else
-        free(d);
-    jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
-        jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m));
-    gc_num.freed += jl_genericmemory_nbytes(m);
-    gc_num.freecall++;
+    jl_gc_add_finalizer_th(ptls, v, f);
 }
 
-// GCNum, statistics manipulation
-// ---
-// Only safe to update the heap inside the GC
-void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
-{
-    int gc_n_threads;
-    jl_ptls_t* gc_all_tls_states;
-    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
-    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
-    for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls = gc_all_tls_states[i];
-        if (ptls) {
-            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval);
-            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc);
-            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc);
-            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc);
-            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc);
-            dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc);
-            if (update_heap) {
-                uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc);
-                freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.free_acc);
-                jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
-                jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0);
-                jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0);
-            }
-        }
-    }
-}
-void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
 {
+    JL_LOCK_NOGC(&finalizers_lock);
+    // Copy the finalizers into a temporary list so that code in the finalizer
+    // won't change the list as we loop through them.
+    // This list is also used as the GC frame when we are running the finalizers
+    arraylist_t copied_list;
+    arraylist_new(&copied_list, 0);
+    // No need to check the to_finalize list since the user is apparently
+    // still holding a reference to the object
     int gc_n_threads;
     jl_ptls_t* gc_all_tls_states;
     gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
     gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
     for (int i = 0; i < gc_n_threads; i++) {
-        jl_ptls_t ptls = gc_all_tls_states[i];
-        if (ptls != NULL) {
-            // don't reset `pool_live_bytes` here
-            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval);
-            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0);
-            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.free_acc, 0);
-        }
+        jl_ptls_t ptls2 = gc_all_tls_states[i];
+        if (ptls2 != NULL)
+            finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
     }
-}
-
-static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
-{
-    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
-    return live_bytes += inc;
-}
-
-void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
-{
-    combine_thread_gc_counts(&gc_num, 0);
-    inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
-    gc_num.allocd = 0;
-    gc_num.deferred_alloc = 0;
-    reset_thread_gc_counts();
-}
-
-size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
-{
-    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
-    size_t sz = layout->size * m->length;
-    if (layout->flags.arrayelem_isunion)
-        // account for isbits Union array selector bytes
-        sz += m->length;
-    return sz;
-}
-
-// GC control
-// ---
-_Atomic(uint32_t) jl_gc_disable_counter = 1;
-
-JL_DLLEXPORT int jl_gc_enable(int on)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    int prev = !ptls->disable_gc;
-    ptls->disable_gc = (on == 0);
-    if (on && !prev) {
-        // disable -> enable
-        if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) {
-            gc_num.allocd += gc_num.deferred_alloc;
-            gc_num.deferred_alloc = 0;
-        }
+    finalize_object(&finalizer_list_marked, o, &copied_list, 0);
+    if (copied_list.len > 0) {
+        // This releases the finalizers lock.
+        jl_gc_run_finalizers_in_list(ct, &copied_list);
     }
-    else if (prev && !on) {
-        // enable -> disable
-        jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
-        // check if the GC is running and wait for it to finish
-        jl_gc_safepoint_(ptls);
+    else {
+        JL_UNLOCK_NOGC(&finalizers_lock);
     }
-    return prev;
-}
-
-JL_DLLEXPORT int jl_gc_is_enabled(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return !ptls->disable_gc;
-}
-
-int gc_logging_enabled = 0;
-
-JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
-    gc_logging_enabled = enable;
-}
-
-JL_DLLEXPORT int jl_is_gc_logging_enabled(void) {
-    return gc_logging_enabled;
-}
-
-JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
-{
-    jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num, 0);
-    // Sync this logic with `base/util.jl:GC_Diff`
-    *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
-}
-
-JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void)
-{
-    return gc_num.total_time;
-}
-
-JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
-{
-    jl_gc_num_t num = gc_num;
-    combine_thread_gc_counts(&num, 0);
-    return num;
+    arraylist_free(&copied_list);
 }
 
-JL_DLLEXPORT void jl_gc_reset_stats(void)
+JL_DLLEXPORT void jl_finalize(jl_value_t *o)
 {
-    gc_num.max_pause = 0;
-    gc_num.max_memory = 0;
-    gc_num.max_time_to_safepoint = 0;
+    jl_finalize_th(jl_current_task, o);
 }
 
-// TODO: these were supposed to be thread local
-JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT
-{
-    int64_t oldtb = last_gc_total_bytes;
-    int64_t newtb;
-    jl_gc_get_total_bytes(&newtb);
-    last_gc_total_bytes = newtb;
-    return newtb - oldtb;
-}
+// =========================================================================== //
+// Threading
+// =========================================================================== //
 
-JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT
-{
-    int64_t oldtb = last_gc_total_bytes;
-    int64_t newtb;
-    jl_gc_get_total_bytes(&newtb);
-    last_gc_total_bytes = newtb - offset;
-    return newtb - oldtb;
-}
-
-JL_DLLEXPORT int64_t jl_gc_live_bytes(void)
-{
-    return live_bytes;
-}
+int gc_n_threads;
+jl_ptls_t* gc_all_tls_states;
 
-JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem)
-{
-#ifdef _P32
-    max_mem = max_mem < MAX32HEAP ? max_mem : MAX32HEAP;
-#endif
-    max_total_memory = max_mem;
-}
+// =========================================================================== //
+// Allocation
+// =========================================================================== //
 
-JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
+JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty)
 {
-    return max_total_memory;
+    return jl_gc_alloc(ptls, sz, ty);
 }
 
-// callback for passing OOM errors from gmp
-JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
+JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
 {
-    jl_throw(jl_memory_exception);
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return jl_gc_alloc(ptls, sz, NULL);
 }
 
 // allocation wrappers that save the size of allocations, to allow using
@@ -767,111 +562,46 @@ JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
     return (void *)(pnew + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
 }
 
-// allocating blocks for Arrays and Strings
-JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    maybe_collect(ptls);
-    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
-    if (allocsz < sz)  // overflow in adding offs, size was "negative"
-        jl_throw(jl_memory_exception);
-
-    int last_errno = errno;
-#ifdef _OS_WINDOWS_
-    DWORD last_error = GetLastError();
-#endif
-    void *b = malloc_cache_align(allocsz);
-    if (b == NULL)
-        jl_throw(jl_memory_exception);
-
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1);
-    jl_batch_accum_heap_size(ptls, allocsz);
-#ifdef _OS_WINDOWS_
-    SetLastError(last_error);
-#endif
-    errno = last_errno;
-    // jl_gc_managed_malloc is currently always used for allocating array buffers.
-    maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag);
-    return b;
-}
-
-uv_mutex_t gc_perm_lock;
-
-JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    jl_gc_add_finalizer_th(ptls, v, f);
-}
-
-JL_DLLEXPORT void jl_finalize(jl_value_t *o)
-{
-    jl_finalize_th(jl_current_task, o);
-}
-
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_new_weakref_th(ptls, value);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sz, NULL);
-}
+// allocator entry points
 
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, 0, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*), NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL);
-}
-
-JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL);
-}
-
-JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void)
-{
-    // TODO: meaningful for MMTk?
-    return GC_MAX_SZCLASS;
-}
-
-JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void)
+JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
 {
-    return sizeof(bigval_t);
+    return jl_gc_alloc_(ptls, sz, ty);
 }
 
+// =========================================================================== //
+// Generic Memory
+// =========================================================================== //
 
-JL_DLLEXPORT void * jl_gc_alloc_typed(jl_ptls_t ptls, size_t sz, void *ty)
+size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
 {
-    return jl_gc_alloc(ptls, sz, ty);
+    const jl_datatype_layout_t *layout = ((jl_datatype_t*)jl_typetagof(m))->layout;
+    size_t sz = layout->size * m->length;
+    if (layout->flags.arrayelem_isunion)
+        // account for isbits Union array selector bytes
+        sz += m->length;
+    return sz;
 }
 
-JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
-{
-    arraylist_push(&ptls->gc_tls.sweep_objs, obj);
+// tracking Memorys with malloc'd storage
+void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
+    // This is **NOT** a GC safe point.
+    mallocmemory_t *ma;
+    if (ptls->gc_tls_common.heap.mafreelist == NULL) {
+        ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t));
+    }
+    else {
+        ma = ptls->gc_tls_common.heap.mafreelist;
+        ptls->gc_tls_common.heap.mafreelist = ma->next;
+    }
+    ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned);
+    ma->next = ptls->gc_tls_common.heap.mallocarrays;
+    ptls->gc_tls_common.heap.mallocarrays = ma;
 }
 
-
-// gc-debug common functions
-// ---
+// =========================================================================== //
+// GC Debug
+// =========================================================================== //
 
 int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
 {
@@ -904,6 +634,77 @@ int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
     return (slot - start) / elsize;
 }
 
+// =========================================================================== //
+// GC Control
+// =========================================================================== //
+
+JL_DLLEXPORT uint32_t jl_get_gc_disable_counter(void) {
+    return jl_atomic_load_acquire(&jl_gc_disable_counter);
+}
+
+JL_DLLEXPORT int jl_gc_is_enabled(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return !ptls->disable_gc;
+}
+
+int gc_logging_enabled = 0;
+
+JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
+    gc_logging_enabled = enable;
+}
+
+JL_DLLEXPORT int jl_is_gc_logging_enabled(void) {
+    return gc_logging_enabled;
+}
+
+
+// collector entry point and control
+_Atomic(uint32_t) jl_gc_disable_counter = 1;
+
+JL_DLLEXPORT int jl_gc_enable(int on)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    int prev = !ptls->disable_gc;
+    ptls->disable_gc = (on == 0);
+    if (on && !prev) {
+        // disable -> enable
+        if (jl_atomic_fetch_add(&jl_gc_disable_counter, -1) == 1) {
+            gc_num.allocd += gc_num.deferred_alloc;
+            gc_num.deferred_alloc = 0;
+        }
+    }
+    else if (prev && !on) {
+        // enable -> disable
+        jl_atomic_fetch_add(&jl_gc_disable_counter, 1);
+        // check if the GC is running and wait for it to finish
+        jl_gc_safepoint_(ptls);
+    }
+    return prev;
+}
+
+// =========================================================================== //
+// MISC
+// =========================================================================== //
+
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    return jl_gc_new_weakref_th(ptls, value);
+}
+
+const uint64_t _jl_buff_tag[3] = {0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull, 0x4eadc0004eadc000ull}; // aka 0xHEADER00
+JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT
+{
+    return jl_buff_tag;
+}
+
+// callback for passing OOM errors from gmp
+JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
+{
+    jl_throw(jl_memory_exception);
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gc-common.h b/src/gc-common.h
new file mode 100644
index 0000000000000..32b7470b13a58
--- /dev/null
+++ b/src/gc-common.h
@@ -0,0 +1,188 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_COMMON_H
+#define JL_GC_COMMON_H
+
+#include "julia.h"
+#include "julia_internal.h"
+#ifndef _OS_WINDOWS_
+#include <sys/mman.h>
+#if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// =========================================================================== //
+// GC Callbacks
+// =========================================================================== //
+
+typedef void (*jl_gc_cb_func_t)(void);
+
+typedef struct _jl_gc_callback_list_t {
+    struct _jl_gc_callback_list_t *next;
+    jl_gc_cb_func_t func;
+} jl_gc_callback_list_t;
+
+extern jl_gc_callback_list_t *gc_cblist_root_scanner;
+extern jl_gc_callback_list_t *gc_cblist_task_scanner;
+extern jl_gc_callback_list_t *gc_cblist_pre_gc;
+extern jl_gc_callback_list_t *gc_cblist_post_gc;
+extern jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
+extern jl_gc_callback_list_t *gc_cblist_notify_external_free;
+extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
+
+#define gc_invoke_callbacks(ty, list, args) \
+    do { \
+        for (jl_gc_callback_list_t *cb = list; \
+                cb != NULL; \
+                cb = cb->next) \
+        { \
+            ((ty)(cb->func)) args; \
+        } \
+    } while (0)
+
+#ifdef __cplusplus
+}
+#endif
+
+// =========================================================================== //
+// malloc wrappers, aligned allocation
+// =========================================================================== //
+
+// data structure for tracking malloc'd genericmemory.
+typedef struct _mallocmemory_t {
+    jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory
+    struct _mallocmemory_t *next;
+} mallocmemory_t;
+
+#if defined(_OS_WINDOWS_)
+STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
+{
+    return _aligned_malloc(sz ? sz : 1, align);
+}
+STATIC_INLINE void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz,
+                                       size_t align)
+{
+    (void)oldsz;
+    return _aligned_realloc(p, sz ? sz : 1, align);
+}
+STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
+{
+    _aligned_free(p);
+}
+#else
+STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
+{
+#if defined(_P64) || defined(__APPLE__)
+    if (align <= 16)
+        return malloc(sz);
+#endif
+    void *ptr;
+    if (posix_memalign(&ptr, align, sz))
+        return NULL;
+    return ptr;
+}
+STATIC_INLINE void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz,
+                                       size_t align)
+{
+#if defined(_P64) || defined(__APPLE__)
+    if (align <= 16)
+        return realloc(d, sz);
+#endif
+    void *b = jl_malloc_aligned(sz, align);
+    if (b != NULL) {
+        memcpy(b, d, oldsz > sz ? sz : oldsz);
+        free(d);
+    }
+    return b;
+}
+STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
+{
+    free(p);
+}
+#endif
+#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
+#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
+
+// =========================================================================== //
+// Pointer tagging
+// =========================================================================== //
+
+STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_MARKED) != 0;
+}
+
+STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT
+{
+    return (bits & GC_OLD) != 0;
+}
+
+STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT
+{
+    return (tag & ~(uintptr_t)3) | bits;
+}
+
+STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+{
+    return ((uintptr_t)v) & mask;
+}
+
+STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
+{
+    return (void*)(((uintptr_t)v) & ~mask);
+}
+
+// =========================================================================== //
+// GC Metrics
+// =========================================================================== //
+
+extern jl_gc_num_t gc_num;
+
+// =========================================================================== //
+// Stop-the-world for GC
+// =========================================================================== //
+void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
+
+// =========================================================================== //
+// Finalization
+// =========================================================================== //
+
+// Protect all access to `finalizer_list_marked` and `to_finalize`.
+// For accessing `ptls->finalizers`, the lock is needed if a thread
+// is going to realloc the buffer (of its own list) or accessing the
+// list of another thread
+extern jl_mutex_t finalizers_lock;
+// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
+// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
+// If an object pointer has the second lowest bit set, the current pointer is a c object pointer.
+//   It must be aligned at least 4, and it finalized immediately (at "quiescence").
+// `to_finalize` should not have tagged pointers.
+extern arraylist_t finalizer_list_marked;
+extern arraylist_t to_finalize;
+
+void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT;
+void run_finalizer(jl_task_t *ct, void *o, void *ff);
+void run_finalizers(jl_task_t *ct, int finalizers_thread);
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o);
+
+
+// =========================================================================== //
+// Threading
+// =========================================================================== //
+
+extern int gc_n_threads;
+extern jl_ptls_t* gc_all_tls_states;
+
+// =========================================================================== //
+// Logging
+// =========================================================================== //
+
+extern int gc_logging_enabled;
+
+#endif // JL_GC_COMMON_H
diff --git a/src/gc-debug.c b/src/gc-debug.c
index 19348b380e145..2c8e1c6055414 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -1,8 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #ifndef MMTK_GC
-
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
 #include "julia.h"
 #include <inttypes.h>
 #include <stddef.h>
@@ -136,7 +136,7 @@ static void clear_mark(int bits)
     }
     bigval_t *v;
     for (int i = 0; i < gc_n_threads; i++) {
-        v = gc_all_tls_states[i]->gc_tls.heap.big_objects;
+        v = gc_all_tls_states[i]->gc_tls.heap.young_generation_of_bigvals;
         while (v != NULL) {
             void *gcv = &v->header;
             if (!gc_verifying)
@@ -146,7 +146,7 @@ static void clear_mark(int bits)
         }
     }
 
-    v = big_objects_marked;
+    v = oldest_generation_of_bigvals;
     while (v != NULL) {
         void *gcv = &v->header;
         if (!gc_verifying)
@@ -538,13 +538,13 @@ static void gc_scrub_task(jl_task_t *ta)
 
     char *low;
     char *high;
-    if (ta->copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
+    if (ta->ctx.copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
         low  = (char*)ptls2->stackbase - ptls2->stacksize;
         high = (char*)ptls2->stackbase;
     }
-    else if (ta->stkbuf) {
-        low  = (char*)ta->stkbuf;
-        high = (char*)ta->stkbuf + ta->bufsz;
+    else if (ta->ctx.stkbuf) {
+        low  = (char*)ta->ctx.stkbuf;
+        high = (char*)ta->ctx.stkbuf + ta->ctx.bufsz;
     }
     else
         return;
@@ -919,113 +919,138 @@ void jl_gc_debug_init(void)
 // GC summary stats
 
 #ifdef MEMPROFILE
-// TODO repair this and possibly merge with `gc_count_pool`
-static size_t pool_stats(jl_gc_pool_t *p, size_t *pwaste, size_t *np,
-                         size_t *pnold)
+
+typedef struct _gc_memprofile_stat_t {
+    size_t nfree; // for pool only
+    size_t npgs;  // for pool only
+    size_t nused;
+    size_t nbytes_used;
+    size_t nused_old;
+    size_t nbytes_used_old;
+} gc_memprofile_stat_t;
+
+void gc_stats_all_pool(void)
 {
-    jl_taggedvalue_t *halfpages = p->newpages;
-    size_t osize = p->osize;
-    size_t nused=0, nfree=0, npgs=0, nold=0;
-
-    if (halfpages != NULL) {
-        npgs++;
-        char *v = gc_page_data(halfpages) + GC_PAGE_OFFSET;
-        char *lim = (char*)halfpages - 1;
-        int i = 0;
-        while (v <= lim) {
-            if (!gc_marked(((jl_taggedvalue_t*)v)->bits.gc)) {
-                nfree++;
+    gc_memprofile_stat_t stat[JL_GC_N_POOLS];
+    memset(stat, 0, sizeof(stat));
+    for (int t_i = 0; t_i < gc_n_threads; t_i++) {
+        jl_ptls_t ptls2 = gc_all_tls_states[t_i];
+        if (ptls2 == NULL) {
+            continue;
+        }
+        jl_gc_page_stack_t *pgstk = &ptls2->gc_tls.page_metadata_allocd;
+        jl_gc_pagemeta_t *pg = jl_atomic_load_relaxed(&pgstk->bottom);
+        while (pg != NULL) {
+            assert(gc_alloc_map_is_set(pg->data));
+            int pool_n = pg->pool_n;
+            jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[pool_n];
+            char *data = pg->data;
+            // compute the start of the data area in this page
+            jl_taggedvalue_t *v0 = (jl_taggedvalue_t*)(data + GC_PAGE_OFFSET);
+            // compute the limit of valid data in this page
+            char *lim = data + GC_PAGE_SZ - pg->osize;
+            char *lim_newpages = data + GC_PAGE_SZ;
+            if (gc_page_data((char*)p->newpages - 1) == data) {
+                lim_newpages = (char*)p->newpages;
             }
-            else {
-                nused++;
-                if (((jl_taggedvalue_t*)v)->bits.gc == GC_OLD_MARKED) {
-                    nold++;
+            char *v = (char*)v0;
+            gc_memprofile_stat_t *stat_n = &stat[pool_n];
+            while (v <= lim) {
+                uint8_t bits = ((jl_taggedvalue_t*)v)->bits.gc;
+                if (!gc_marked(bits) || (char*)v >= lim_newpages) {
+                    stat_n->nfree++;
+                }
+                else {
+                    if (gc_old(bits)) {
+                        assert(bits == GC_OLD_MARKED);
+                        stat_n->nused_old++;
+                        stat_n->nbytes_used_old += pg->osize;
+                    }
+                    else {
+                        stat_n->nused++;
+                        stat_n->nbytes_used += pg->osize;
+                    }
                 }
+                v = v + pg->osize;
             }
-            v = v + osize;
-            i++;
+            stat_n->npgs++;
+            pg = pg->next;
         }
-        // only the first page is allocated on
-    }
-    *pwaste = npgs * GC_PAGE_SZ - (nused * p->osize);
-    *np = npgs;
-    *pnold = nold;
-    if (npgs != 0) {
-        jl_safe_printf("%4d : %7lld/%7lld objects (%3lld%% old), %5lld pages, %5lld kB, %5lld kB waste\n",
-                       p->osize,
-                       (long long)nused,
-                       (long long)(nused + nfree),
-                       (long long)(nused ? (nold * 100) / nused : 0),
-                       (long long)npgs,
-                       (long long)((nused * p->osize) / 1024),
-                       (long long)(*pwaste / 1024));
     }
-    return nused*p->osize;
-}
-
-void gc_stats_all_pool(void)
-{
-    size_t nb=0, w, tw=0, no=0, tp=0, nold=0, noldbytes=0, np, nol;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
-        for (int t_i = 0; t_i < gc_n_threads; t_i++) {
-            jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-            size_t b = pool_stats(&ptls2->gc_tls.heap.norm_pools[i], &w, &np, &nol);
-            nb += b;
-            no += (b / ptls2->gc_tls.heap.norm_pools[i].osize);
-            tw += w;
-            tp += np;
-            nold += nol;
-            noldbytes += nol * ptls2->gc_tls.heap.norm_pools[i].osize;
-        }
+        jl_ptls_t ptls = jl_current_task->ptls;
+        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[i];
+        gc_memprofile_stat_t *s = &stat[i];
+        jl_safe_printf("%4d : %7lld/%7lld objects (%3lld%% old), %5lld pages, %5lld kB, %5lld kB waste\n",
+            p->osize,
+            (long long)(s->nused + s->nused_old),
+            (long long)(s->nused + s->nused_old + s->nfree),
+            (long long)((s->nused + s->nused_old) ? (s->nused_old * 100) / (s->nused + s->nused_old) : 0),
+            (long long)s->npgs,
+            (long long)(((s->nused + s->nused_old) * p->osize) / 1024),
+            (long long)((GC_PAGE_SZ * s->npgs - s->nused * p->osize) / 1024));
     }
-    jl_safe_printf("%lld objects (%lld%% old), %lld kB (%lld%% old) total allocated, "
-                   "%lld total fragments (%lld%% overhead), in %lld pages\n",
-                   (long long)no,
-                   (long long)(no ? (nold * 100) / no : 0),
-                   (long long)(nb / 1024),
-                   (long long)(nb ? (noldbytes * 100) / nb : 0),
-                   (long long)tw,
-                   (long long)(nb ? (tw * 100) / nb : 0),
-                   (long long)tp);
 }
 
 void gc_stats_big_obj(void)
 {
-    size_t nused=0, nbytes=0, nused_old=0, nbytes_old=0;
+    gc_memprofile_stat_t stat;
+    memset(&stat, 0, sizeof(stat));
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
-        bigval_t *v = ptls2->gc_tls.heap.big_objects;
+        if (ptls2 == NULL) {
+            continue;
+        }
+        bigval_t *v = ptls2->gc_tls.heap.young_generation_of_bigvals;
+        v = v->next; // skip the sentinel
         while (v != NULL) {
             if (gc_marked(v->bits.gc)) {
-                nused++;
-                nbytes += v->sz & ~3;
+                if (gc_old(v->bits.gc)) {
+                    assert(v->bits.gc == GC_OLD_MARKED);
+                    stat.nused_old++;
+                    stat.nbytes_used_old += v->sz;
+                }
+                else {
+                    stat.nused++;
+                    stat.nbytes_used += v->sz;
+                }
             }
             v = v->next;
         }
-        v = big_objects_marked;
+        v = oldest_generation_of_bigvals;
+        v = v->next; // skip the sentinel
         while (v != NULL) {
-            if (gc_marked(v->bits.gc)) {
-                nused_old++;
-                nbytes_old += v->sz & ~3;
-            }
+            assert(v->bits.gc == GC_OLD_MARKED);
+            stat.nused_old++;
+            stat.nbytes_used_old += v->sz;
             v = v->next;
         }
 
-        mallocarray_t *ma = ptls2->gc_tls.heap.mallocarrays;
+        mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays;
         while (ma != NULL) {
-            if (gc_marked(jl_astaggedvalue(ma->a)->bits.gc)) {
-                nused++;
-                nbytes += jl_genericmemory_nbytes((jl_genericmemory_t*)ma->a);
+            uint8_t bits =jl_astaggedvalue(ma->a)->bits.gc;
+            if (gc_marked(bits)) {
+                jl_genericmemory_t *m = (jl_genericmemory_t*)ma->a;
+                m = (jl_genericmemory_t*)((uintptr_t)m & ~(uintptr_t)1);
+                size_t sz = jl_genericmemory_nbytes(m);
+                if (gc_old(bits)) {
+                    assert(bits == GC_OLD_MARKED);
+                    stat.nused_old++;
+                    stat.nbytes_used_old += sz;
+                }
+                else {
+                    stat.nused++;
+                    stat.nbytes_used += sz;
+                }
             }
             ma = ma->next;
         }
     }
-
     jl_safe_printf("%lld kB (%lld%% old) in %lld large objects (%lld%% old)\n",
-                   (long long)((nbytes + nbytes_old) / 1024),
-                   (long long)(nbytes + nbytes_old ? (nbytes_old * 100) / (nbytes + nbytes_old) : 0),
-                   (long long)(nused + nused_old),
-                   (long long)(nused + nused_old ? (nused_old * 100) / (nused + nused_old) : 0));
+                   (long long)((stat.nbytes_used + stat.nbytes_used_old) / 1024),
+                   (long long)(stat.nbytes_used + stat.nbytes_used_old ? (stat.nbytes_used_old * 100) / (stat.nbytes_used + stat.nbytes_used_old) : 0),
+                   (long long)(stat.nused + stat.nused_old),
+                   (long long)(stat.nused + stat.nused_old ? (stat.nused_old * 100) / (stat.nused + stat.nused_old) : 0));
 }
 #endif //MEMPROFILE
 
@@ -1081,67 +1106,6 @@ void gc_count_pool(void)
     jl_safe_printf("************************\n");
 }
 
-// Print a backtrace from the `mq->start` of the mark queue up to `mq->current`
-// `offset` will be added to `mq->current` for convenience in the debugger.
-NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_markqueue_t *mq, int offset)
-{
-    // jl_jmp_buf *old_buf = jl_get_safe_restore();
-    // jl_jmp_buf buf;
-    // jl_set_safe_restore(&buf);
-    // if (jl_setjmp(buf, 0) != 0) {
-    //     jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n");
-    //     jl_set_safe_restore(old_buf);
-    //     return;
-    // }
-    // jl_value_t **start = mq->start;
-    // jl_value_t **end = mq->current + offset;
-    // for (; start < end; start++) {
-    //     jl_value_t *obj = *start;
-    //     jl_taggedvalue_t *o = jl_astaggedvalue(obj);
-    //     jl_safe_printf("Queued object: %p :: (tag: %zu) (bits: %zu)\n", obj,
-    //                    (uintptr_t)o->header, ((uintptr_t)o->header & 3));
-    //     jl_((void*)(jl_datatype_t *)(o->header & ~(uintptr_t)0xf));
-    // }
-    // jl_set_safe_restore(old_buf);
-}
-
-// int gc_slot_to_fieldidx(void *obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT
-// {
-//     int nf = (int)jl_datatype_nfields(vt);
-//     for (int i = 1; i < nf; i++) {
-//         if (slot < (void*)((char*)obj + jl_field_offset(vt, i)))
-//             return i - 1;
-//     }
-//     return nf - 1;
-// }
-
-// int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT
-// {
-//     char *slot = (char*)_slot;
-//     jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj);
-//     char *start = NULL;
-//     size_t len = 0;
-//     size_t elsize = sizeof(void*);
-//     if (vt == jl_module_type) {
-//         jl_module_t *m = (jl_module_t*)obj;
-//         start = (char*)m->usings.items;
-//         len = m->usings.len;
-//     }
-//     else if (vt == jl_simplevector_type) {
-//         start = (char*)jl_svec_data(obj);
-//         len = jl_svec_len(obj);
-//     }
-//     else if (vt->name == jl_array_typename) {
-//         jl_array_t *a = (jl_array_t*)obj;
-//         start = (char*)a->data;
-//         len = jl_array_len(a);
-//         elsize = a->elsize;
-//     }
-//     if (slot < start || slot >= start + elsize * len)
-//         return -1;
-//     return (slot - start) / elsize;
-// }
-
 void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect, int64_t live_bytes) JL_NOTSAFEPOINT {
     if (!gc_logging_enabled) {
         return;
@@ -1167,4 +1131,4 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect
 }
 #endif
 
-#endif  // !MMTK_GC
+#endif // !MMTK_GC
diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp
index 77a6e70a127e6..72eb17115f4c7 100644
--- a/src/gc-heap-snapshot.cpp
+++ b/src/gc-heap-snapshot.cpp
@@ -2,9 +2,9 @@
 
 #include "gc-heap-snapshot.h"
 
+#include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
-#include "gc.h"
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
@@ -182,8 +182,10 @@ struct HeapSnapshot {
 // global heap snapshot, mutated by garbage collector
 // when snapshotting is on.
 int gc_heap_snapshot_enabled = 0;
+int gc_heap_snapshot_redact_data = 0;
 HeapSnapshot *g_snapshot = nullptr;
-extern jl_mutex_t heapsnapshot_lock;
+// mutex for gc-heap-snapshot.
+jl_mutex_t heapsnapshot_lock;
 
 void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one);
 void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one);
@@ -194,7 +196,7 @@ void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT;
 
 
 JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
-    ios_t *strings, ios_t *json, char all_one)
+    ios_t *strings, ios_t *json, char all_one, char redact_data)
 {
     HeapSnapshot snapshot;
     snapshot.nodes = nodes;
@@ -206,6 +208,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
 
     // Enable snapshotting
     g_snapshot = &snapshot;
+    gc_heap_snapshot_redact_data = redact_data;
     gc_heap_snapshot_enabled = true;
 
     _add_synthetic_root_entries(&snapshot);
@@ -215,6 +218,7 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
 
     // Disable snapshotting
     gc_heap_snapshot_enabled = false;
+    gc_heap_snapshot_redact_data = 0;
     g_snapshot = nullptr;
 
     jl_mutex_unlock(&heapsnapshot_lock);
@@ -327,7 +331,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
 
     if (jl_is_string(a)) {
         node_type = "String";
-        name = jl_string_data(a);
+        name = gc_heap_snapshot_redact_data ? "<redacted>" : jl_string_data(a);
         self_size = jl_string_len(a);
     }
     else if (jl_is_symbol(a)) {
@@ -557,6 +561,13 @@ void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *
                     g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<internal>"));
 }
 
+void _gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    _record_gc_edge("binding", from, to,
+                    g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "<binding>"));
+}
+
+
 void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
 {
     // valid alloc_type values are 0, 1, 2
@@ -607,38 +618,32 @@ void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &sn
 {
     // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567
     // also https://github.com/microsoft/vscode-v8-heap-tools/blob/c5b34396392397925ecbb4ecb904a27a2754f2c1/v8-heap-parser/src/decoder.rs#L43-L51
-    ios_printf(json, "{\"snapshot\":{");
+    ios_printf(json, "{\"snapshot\":{\n");
 
-    ios_printf(json, "\"meta\":{");
-    ios_printf(json, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],");
-    ios_printf(json, "\"node_types\":[");
+    ios_printf(json, "  \"meta\":{\n");
+    ios_printf(json, "    \"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],\n");
+    ios_printf(json, "    \"node_types\":[");
     snapshot.node_types.print_json_array(json, false);
     ios_printf(json, ",");
-    ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],");
-    ios_printf(json, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],");
-    ios_printf(json, "\"edge_types\":[");
+    ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],\n");
+    ios_printf(json, "    \"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],\n");
+    ios_printf(json, "    \"edge_types\":[");
     snapshot.edge_types.print_json_array(json, false);
     ios_printf(json, ",");
-    ios_printf(json, "\"string_or_number\",\"from_node\"],");
+    ios_printf(json, "\"string_or_number\",\"from_node\"],\n");
     // not used. Required by microsoft/vscode-v8-heap-tools
-    ios_printf(json, "\"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],");
-    ios_printf(json, "\"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],");
-    ios_printf(json, "\"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],");
-    ios_printf(json, "\"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]");
+    ios_printf(json, "    \"trace_function_info_fields\":[\"function_id\",\"name\",\"script_name\",\"script_id\",\"line\",\"column\"],\n");
+    ios_printf(json, "    \"trace_node_fields\":[\"id\",\"function_info_index\",\"count\",\"size\",\"children\"],\n");
+    ios_printf(json, "    \"sample_fields\":[\"timestamp_us\",\"last_assigned_id\"],\n");
+    ios_printf(json, "    \"location_fields\":[\"object_index\",\"script_id\",\"line\",\"column\"]\n");
     // end not used
-    ios_printf(json, "},\n"); // end "meta"
+    ios_printf(json, "  },\n"); // end "meta"
 
-    ios_printf(json, "\"node_count\":%zu,", snapshot.num_nodes);
-    ios_printf(json, "\"edge_count\":%zu,", snapshot.num_edges);
-    ios_printf(json, "\"trace_function_count\":0"); // not used. Required by microsoft/vscode-v8-heap-tools
-    ios_printf(json, "},\n"); // end "snapshot"
-
-    // not used. Required by microsoft/vscode-v8-heap-tools
-    ios_printf(json, "\"trace_function_infos\":[],");
-    ios_printf(json, "\"trace_tree\":[],");
-    ios_printf(json, "\"samples\":[],");
-    ios_printf(json, "\"locations\":[]");
-    // end not used
+    ios_printf(json, "  \"node_count\":%zu,\n", snapshot.num_nodes);
+    ios_printf(json, "  \"edge_count\":%zu,\n", snapshot.num_edges);
+    ios_printf(json, "  \"trace_function_count\":0\n"); // not used. Required by microsoft/vscode-v8-heap-tools
+    ios_printf(json, "}\n"); // end "snapshot"
 
+    // this } is removed by the julia reassembler in Profile
     ios_printf(json, "}");
 }
diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h
index 70884f5f62d6a..dc5b22bb72eb1 100644
--- a/src/gc-heap-snapshot.h
+++ b/src/gc-heap-snapshot.h
@@ -32,9 +32,12 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t byt
 void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT;
 // Used for objects that are reachable from the finalizer list
 void _gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT;
+// Used for objects reachable from the binding partition pointer union
+void _gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT;
 
 extern int gc_heap_snapshot_enabled;
 extern int prev_sweep_full;
+extern jl_mutex_t heapsnapshot_lock;
 
 int gc_slot_to_fieldidx(void *_obj, void *slot, jl_datatype_t *vt) JL_NOTSAFEPOINT;
 int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT;
@@ -96,6 +99,13 @@ static inline void gc_heap_snapshot_record_internal_array_edge(jl_value_t *from,
     }
 }
 
+static inline void gc_heap_snapshot_record_binding_partition_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT
+{
+    if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
+        _gc_heap_snapshot_record_binding_partition_edge(from, to);
+    }
+}
+
 static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT
 {
     if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
@@ -121,7 +131,7 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i
 // Functions to call from Julia to take heap snapshot
 // ---------------------------------------------------------------------
 JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges,
-    ios_t *strings, ios_t *json, char all_one);
+    ios_t *strings, ios_t *json, char all_one, char redact_data);
 
 
 #ifdef __cplusplus
diff --git a/src/gc-interface.h b/src/gc-interface.h
new file mode 100644
index 0000000000000..176efc81b7ca7
--- /dev/null
+++ b/src/gc-interface.h
@@ -0,0 +1,249 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  Garbage Collection interface that must be implemented by third-party GCs
+*/
+
+#ifndef JL_GC_INTERFACE_H
+#define JL_GC_INTERFACE_H
+
+#include "dtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct _jl_tls_states_t;
+struct _jl_value_t;
+struct _jl_weakref_t;
+struct _jl_datatype_t;
+
+// ========================================================================= //
+// GC Metrics
+// ========================================================================= //
+
+// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
+typedef struct {
+    int64_t allocd;
+    int64_t deferred_alloc;
+    int64_t freed;
+    uint64_t malloc;
+    uint64_t realloc;
+    uint64_t poolalloc;
+    uint64_t bigalloc;
+    uint64_t freecall;
+    uint64_t total_time;
+    uint64_t total_allocd;
+    size_t interval;
+    int pause;
+    int full_sweep;
+    uint64_t max_pause;
+    uint64_t max_memory;
+    uint64_t time_to_safepoint;
+    uint64_t max_time_to_safepoint;
+    uint64_t total_time_to_safepoint;
+    uint64_t sweep_time;
+    uint64_t mark_time;
+    uint64_t total_sweep_time;
+    uint64_t total_mark_time;
+    uint64_t last_full_sweep;
+    uint64_t last_incremental_sweep;
+} jl_gc_num_t;
+
+// ========================================================================= //
+// System-wide Initialization
+// ========================================================================= //
+
+// System-wide initialization function. Responsible for initializing global locks as well as
+// global memory parameters (e.g. target heap size) used by the collector.
+void jl_gc_init(void);
+// Spawns GC threads.
+void jl_start_gc_threads(void);
+
+// ========================================================================= //
+// Per-thread Initialization
+// ========================================================================= //
+
+// Initializes thread-local data structures such as thread-local object pools,
+// thread-local remembered sets and thread-local allocation counters.
+// Should be called exactly once per Julia thread.
+void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT;
+// Deallocates any memory previously used for thread-local GC data structures.
+// Mostly used to ensure that we perform this memory cleanup for foreign threads that are
+// about to leave Julia.
+void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls);
+
+// ========================================================================= //
+// Controls
+// ========================================================================= //
+
+typedef enum {
+    JL_GC_AUTO = 0, // use heuristics to determine the collection type
+    JL_GC_FULL = 1, // force a full collection
+    JL_GC_INCREMENTAL = 2, // force an incremental collection
+} jl_gc_collection_t;
+// Enables or disables (depending on the value of the argument) the collector. Returns
+// whether GC was previously enabled.
+JL_DLLEXPORT int jl_gc_enable(int on);
+// Returns whether the collector is enabled.
+JL_DLLEXPORT int jl_gc_is_enabled(void);
+// Sets a soft limit to Julia's heap.
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
+// Runs a GC cycle. This function's parameter determines whether we're running an
+// incremental, full, or automatic (i.e. heuristic driven) collection. Returns whether we
+// should run a collection cycle again (e.g. a full mark right after a full sweep to ensure
+// we do a full heap traversal).
+JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection);
+// Returns whether the thread with `tid` is a collector thread
+JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT;
+// Pinning objects; Returns whether the object has been pinned by this call.
+JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj);
+
+// ========================================================================= //
+// Metrics
+// ========================================================================= //
+
+// Retrieves Julia's `GC_Num` (structure that stores GC statistics).
+JL_DLLEXPORT jl_gc_num_t jl_gc_num(void);
+// Returns the difference between the current value of total live bytes now
+// (live bytes at the last collection plus number of bytes allocated since then),
+// compared to the value at the last time this function was called.
+JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
+// Returns the difference between the current value of total live bytes now
+// (live bytes at the last collection plus number of bytes allocated since then)
+// compared to the value at the last time this function was called. The offset parameter
+// is subtracted from this value in order to obtain the return value.
+JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
+// Returns the number of pool allocated bytes. This could always return 0 for GC
+// implementations that do not use pools.
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void);
+// Returns the number of live bytes at the end of the last collection cycle
+// (doesn't include the number of allocated bytes since then).
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void);
+// Stores the number of live bytes at the end of the last collection cycle plus the number
+// of bytes we allocated since then into the 64-bit integer pointer passed as an argument.
+JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT;
+// Retrieves the value of Julia's soft heap limit.
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
+// High-resolution (nano-seconds) value of total time spent in GC.
+JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void);
+
+// ========================================================================= //
+// Allocation
+// ========================================================================= //
+
+// On GCC, this function is inlined when sz is constant (see julia_internal.h)
+// In general, this function should implement allocation and should use the specific GC's logic
+// to decide whether to allocate a small or a large object. Finally, note that this function
+// **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record
+// an allocation of that type in the allocation profiler.
+struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty);
+
+// Allocates small objects and increments Julia allocation counterst. Size of the object
+// header must be included in the object size. The (possibly unused in some implementations)
+// offset to the arena in which we're allocating is passed in the second parameter, and the
+// object size in the third parameter. If thread-local allocators are used, then this
+// function should allocate in the thread-local allocator of the thread referenced by the
+// jl_ptls_t argument. An additional (last) parameter containing information about the type
+// of the object being allocated may be used to record an allocation of that type in the
+// allocation profiler.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_small_alloc(struct _jl_tls_states_t *ptls,
+                                                   int offset, int osize,
+                                                   struct _jl_value_t *type);
+// Description: Allocates large objects and increments Julia allocation counters. Size of
+// the object header must be included in the object size. If thread-local allocators are
+// used, then this function should allocate in the thread-local allocator of the thread
+// referenced by the jl_ptls_t argument. An additional (last) parameter containing
+// information about the type of the object being allocated may be used to record an
+// allocation of that type in the allocation profiler.
+JL_DLLEXPORT struct _jl_value_t *jl_gc_big_alloc(struct _jl_tls_states_t *ptls, size_t sz,
+                                                 struct _jl_value_t *type);
+// Wrapper around Libc malloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
+// Wrapper around Libc calloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz);
+// Wrapper around Libc free that updates Julia allocation counters.
+JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz);
+// Wrapper around Libc realloc that updates Julia allocation counters.
+JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz);
+// Wrapper around Libc malloc that's used to dynamically allocate memory for Arrays and
+// Strings. It increments Julia allocation counters and should check whether we're close to
+// the Julia heap target, and therefore, whether we should run a collection. Note that this
+// doesn't record the size of the allocation request in a side metadata (i.e. a few words in
+// front of the memory payload): this function is used for Julia object allocations, and we
+// assume that there is already a field in the Julia object being allocated that we may use
+// to store the size of the memory buffer.
+JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
+// Allocates a new weak-reference, assigns its value and increments Julia allocation
+// counters. If thread-local allocators are used, then this function should allocate in the
+// thread-local allocator of the thread referenced by the first jl_ptls_t argument.
+JL_DLLEXPORT struct _jl_weakref_t *jl_gc_new_weakref_th(struct _jl_tls_states_t *ptls,
+                                                        struct _jl_value_t *value);
+// Permanently allocates a memory slot of the size specified by the first parameter. This
+// block of memory is allocated in an immortal region that is never swept. The second
+// parameter specifies whether the memory should be filled with zeros. The third and fourth
+// parameters specify the alignment and an offset in bytes, respectively. Specifically, the
+// pointer obtained by advancing the result of this function by the number of bytes
+// specified in the fourth parameter will be aligned according to the value given by the
+// third parameter in bytes.
+JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align,
+                                    unsigned offset) JL_NOTSAFEPOINT;
+// Permanently allocates an object of the size specified by the first parameter. Size of the
+// object header must be included in the object size. This object is allocated in an
+// immortal region that is never swept. The second parameter specifies the type of the
+// object being allocated and will be used to set the object header.
+struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT;
+
+// This function notifies the GC about memory addresses that are set when loading the boot image.
+// The GC may use that information to, for instance, determine that all objects in that chunk of memory should
+// be treated as marked and belonged to the old generation in nursery collections.
+void jl_gc_notify_image_load(const char* img_data, size_t len);
+
+// This function notifies the GC about memory addresses that are set when allocating the boot image.
+// The GC may use that information to, for instance, determine that all objects in that chunk of memory should
+// be treated as marked and belonged to the old generation in nursery collections.
+void jl_gc_notify_image_alloc(char* img_data, size_t len);
+
+// ========================================================================= //
+// Runtime Write-Barriers
+// ========================================================================= //
+
+// Write barrier slow-path. If a generational collector is used,
+// it may enqueue an old object into the remembered set of the calling thread.
+JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
+// In a generational collector is used, this function walks over the fields of the
+// object specified by the second parameter (as defined by the data type in the third
+// parameter). If a field points to a young object, the first parameter is enqueued into the
+// remembered set of the calling thread.
+JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
+                                        struct _jl_datatype_t *dt) JL_NOTSAFEPOINT;
+// If a generational collector is used, checks whether the function argument points to an
+// old object, and if so, calls the write barrier slow path above. In most cases, this
+// function is used when its caller has verified that there is a young reference in the
+// object that's being passed as an argument to this function.
+STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT;
+// Write barrier function that must be used after pointer writes to heap-allocated objects –
+// the value of the field being written must also point to a heap-allocated object.
+// If a generational collector is used, it may check whether the two function arguments are
+// in different GC generations (i.e. if the first argument points to an old object and the
+// second argument points to a young object), and if so, call the write barrier slow-path.
+STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT;
+// Freshly allocated objects are known to be in the young generation until the next safepoint,
+// so write barriers can be omitted until the next allocation. This function is a no-op that
+// can be used to annotate that a write barrier would be required were it not for this property
+// (as opposed to somebody just having forgotten to think about write barriers).
+STATIC_INLINE void jl_gc_wb_fresh(const void *parent, const void *ptr) JL_NOTSAFEPOINT {}
+// Used to annotate that a write barrier would be required, but may be omitted because `ptr`
+// is known to be an old object.
+STATIC_INLINE void jl_gc_wb_knownold(const void *parent, const void *ptr) JL_NOTSAFEPOINT {}
+// Write-barrier function that must be used after copying multiple fields of an object into
+// another. It should be semantically equivalent to triggering multiple write barriers – one
+// per field of the object being copied, but may be special-cased for performance reasons.
+STATIC_INLINE void jl_gc_multi_wb(const void *parent,
+                                  const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c
new file mode 100644
index 0000000000000..f4a44471f37f8
--- /dev/null
+++ b/src/gc-mmtk.c
@@ -0,0 +1,774 @@
+#ifdef MMTK_GC
+
+#include "mmtk_julia.h"
+#include "gc-common.h"
+#include "mmtkMutator.h"
+#include "gc-mmtk.h"
+#include "threading.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// FIXME: Does it make sense for MMTk to implement something similar
+// for now, just ignoring this.
+
+// Must be kept in sync with `base/timing.jl`
+#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0)
+#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1)
+#define FULL_SWEEP_REASON_USER_MAX_EXCEEDED (2)
+#define FULL_SWEEP_REASON_LARGE_PROMOTION_RATE (3)
+#define FULL_SWEEP_NUM_REASONS (4)
+
+// Table recording number of full GCs due to each reason
+JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
+
+// FIXME: Should the values below be shared between both GC's?
+// Note that MMTk uses a hard max heap limit, which is set by default
+// as 70% of the free available memory. The min heap is set as the
+// default_collect_interval variable below.
+
+// max_total_memory is a suggestion.  We try very hard to stay
+// under this limit, but we will go above it rather than halting.
+#ifdef _P64
+typedef uint64_t memsize_t;
+static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
+// We expose this to the user/ci as jl_gc_set_max_memory
+static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
+#else
+typedef uint32_t memsize_t;
+static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
+// Work really hard to stay within 2GB
+// Alternative is to risk running out of address space
+// on 32 bit architectures.
+#define MAX32HEAP 1536 * 1024 * 1024
+static memsize_t max_total_memory = (memsize_t) MAX32HEAP;
+#endif
+
+void jl_gc_init(void) {
+    // TODO: use jl_options.heap_size_hint to set MMTk's fixed heap size? (see issue: https://github.com/mmtk/mmtk-julia/issues/167)
+    JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
+
+    arraylist_new(&to_finalize, 0);
+    arraylist_new(&finalizer_list_marked, 0);
+
+    gc_num.allocd = 0;
+    gc_num.max_pause = 0;
+    gc_num.max_memory = 0;
+
+    long long min_heap_size;
+    long long max_heap_size;
+    char* min_size_def = getenv("MMTK_MIN_HSIZE");
+    char* min_size_gb = getenv("MMTK_MIN_HSIZE_G");
+
+    char* max_size_def = getenv("MMTK_MAX_HSIZE");
+    char* max_size_gb = getenv("MMTK_MAX_HSIZE_G");
+
+    // default min heap currently set as Julia's default_collect_interval
+    if (min_size_def != NULL) {
+        char *p;
+        double min_size = strtod(min_size_def, &p);
+        min_heap_size = (long) 1024 * 1024 * min_size;
+    } else if (min_size_gb != NULL) {
+        char *p;
+        double min_size = strtod(min_size_gb, &p);
+        min_heap_size = (long) 1024 * 1024 * 1024 * min_size;
+    } else {
+        min_heap_size = default_collect_interval;
+    }
+
+    // default max heap currently set as 70% the free memory in the system
+    if (max_size_def != NULL) {
+        char *p;
+        double max_size = strtod(max_size_def, &p);
+        max_heap_size = (long) 1024 * 1024 * max_size;
+    } else if (max_size_gb != NULL) {
+        char *p;
+        double max_size = strtod(max_size_gb, &p);
+        max_heap_size = (long) 1024 * 1024 * 1024 * max_size;
+    } else {
+        max_heap_size = uv_get_free_memory() * 70 / 100;
+    }
+
+    // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads
+    assert(jl_n_gcthreads == 0);
+
+    // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined
+    int copy_stacks;
+
+#ifdef COPY_STACKS
+    copy_stacks = 1;
+#else
+    copy_stacks = 0;
+#endif
+
+    mmtk_julia_copy_stack_check(copy_stacks);
+
+    // if only max size is specified initialize MMTk with a fixed size heap
+    // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads.
+    // If the two values are the same, we can use either. Otherwise, we need to be careful.
+    uintptr_t gcthreads = jl_options.nmarkthreads;
+    if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) {
+        mmtk_gc_init(0, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag);
+    } else {
+        mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag);
+    }
+}
+
+void jl_start_gc_threads(void) {
+    jl_ptls_t ptls = jl_current_task->ptls;
+    mmtk_initialize_collection((void *)ptls);
+}
+
+void jl_init_thread_heap(struct _jl_tls_states_t *ptls) JL_NOTSAFEPOINT {
+    jl_thread_heap_common_t *heap = &ptls->gc_tls_common.heap;
+    small_arraylist_new(&heap->weak_refs, 0);
+    small_arraylist_new(&heap->live_tasks, 0);
+    for (int i = 0; i < JL_N_STACK_POOLS; i++)
+        small_arraylist_new(&heap->free_stacks[i], 0);
+    heap->mallocarrays = NULL;
+    heap->mafreelist = NULL;
+    arraylist_new(&ptls->finalizers, 0);
+    // Clear the malloc sz count
+    jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0);
+    // Create mutator
+    MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid);
+    // Copy the mutator to the thread local storage
+    memcpy(&ptls->gc_tls.mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext));
+    // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed)
+    mmtk_post_bind_mutator(&ptls->gc_tls.mmtk_mutator, mmtk_mutator);
+    memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num));
+}
+
+void jl_free_thread_gc_state(struct _jl_tls_states_t *ptls) {
+    mmtk_destroy_mutator(&ptls->gc_tls.mmtk_mutator);
+}
+
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem) {
+    // MMTk currently does not allow setting the heap size at runtime
+}
+
+
+inline void maybe_collect(jl_ptls_t ptls)
+{
+    // Just do a safe point for general maybe_collect
+    jl_gc_safepoint_(ptls);
+}
+
+// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll),
+// is expensive. So we only check for every few allocations.
+static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz)
+{
+    // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to
+    // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage
+    // as much as we can.
+    if (ptls->gc_tls.malloc_sz_since_last_poll > 4096) {
+        jl_atomic_store_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, 0);
+        mmtk_gc_poll(ptls);
+    } else {
+        jl_atomic_fetch_add_relaxed(&ptls->gc_tls.malloc_sz_since_last_poll, sz);
+        jl_gc_safepoint_(ptls);
+    }
+}
+
+JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) {
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+        static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
+        jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
+        return;
+    }
+    mmtk_handle_user_collection_request(ptls, collection);
+}
+
+// FIXME: The functions combine_thread_gc_counts and reset_thread_gc_counts
+// are currently nearly identical for mmtk and for stock. However, the stats
+// are likely different (e.g., MMTk doesn't track the bytes allocated in the fastpath,
+// but only when the slowpath is called). We might need to adapt these later so that
+// the statistics are the same or as close as possible for each GC.
+
+static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls) {
+            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval);
+            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc);
+            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc);
+            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc);
+            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc);
+            dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+            if (update_heap) {
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+            }
+        }
+    }
+}
+
+
+void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls != NULL) {
+            // don't reset `pool_live_bytes` here
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+        }
+    }
+}
+
+// Retrieves Julia's `GC_Num` (structure that stores GC statistics).
+JL_DLLEXPORT jl_gc_num_t jl_gc_num(void) {
+    jl_gc_num_t num = gc_num;
+    combine_thread_gc_counts(&num, 0);
+    return num;
+}
+
+int64_t last_gc_total_bytes = 0;
+int64_t last_live_bytes = 0; // live_bytes at last collection
+int64_t live_bytes = 0;
+
+JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT {
+    int64_t oldtb = last_gc_total_bytes;
+    int64_t newtb;
+    jl_gc_get_total_bytes(&newtb);
+    last_gc_total_bytes = newtb;
+    return newtb - oldtb;
+}
+
+JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT
+{
+    int64_t oldtb = last_gc_total_bytes;
+    int64_t newtb;
+    jl_gc_get_total_bytes(&newtb);
+    last_gc_total_bytes = newtb - offset;
+    return newtb - oldtb;
+}
+
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void) {
+    return 0;
+}
+
+void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+}
+
+void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
+{
+}
+
+int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
+{
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
+    return live_bytes += inc;
+}
+
+void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
+{
+    combine_thread_gc_counts(&gc_num, 0);
+    inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
+    gc_num.allocd = 0;
+    gc_num.deferred_alloc = 0;
+    reset_thread_gc_counts();
+}
+
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void) {
+    return last_live_bytes;
+}
+
+JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
+{
+    jl_gc_num_t num = gc_num;
+    combine_thread_gc_counts(&num, 0);
+    // Sync this logic with `base/util.jl:GC_Diff`
+    *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
+}
+
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
+{
+    // FIXME: should probably return MMTk's heap size
+    return max_total_memory;
+}
+
+// weak references
+// ---
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value)
+{
+    jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type);
+    wr->value = value;  // NOTE: wb not needed here
+    mmtk_add_weak_candidate(wr);
+    return wr;
+}
+
+// allocation
+
+extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
+extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
+extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator);
+extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator);
+extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
+extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS;
+extern void mmtk_store_obj_size_c(void* obj, size_t size);
+
+#define MMTK_DEFAULT_IMMIX_ALLOCATOR (0)
+#define MMTK_IMMORTAL_BUMP_ALLOCATOR (0)
+
+
+int jl_gc_classify_pools(size_t sz, int *osize)
+{
+    if (sz > GC_MAX_SZCLASS)
+        return -1; // call big alloc function
+    size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    *osize = LLT_ALIGN(allocsz, 16);
+    return 0; // use MMTk's fastpath logic
+}
+#define MMTK_MIN_ALIGNMENT 4
+// MMTk assumes allocation size is aligned to min alignment.
+inline size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT
+{
+    return (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT - 1);
+}
+
+inline void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* cursor, uintptr_t limit, size_t size, size_t align, size_t offset, int allocator) {
+    intptr_t delta = (-offset - *cursor) & (align - 1);
+    uintptr_t result = *cursor + (uintptr_t)delta;
+
+    if (__unlikely(result + size > limit)) {
+        return (void*) mmtk_alloc(mutator, size, align, offset, allocator);
+    } else{
+        *cursor = result + size;
+        return (void*)result;
+    }
+}
+
+inline void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
+    ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR];
+    return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0);
+}
+
+inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    mmtk_post_alloc(mutator, obj, size, 0);
+}
+
+inline void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    // FIXME: for now, we do nothing
+    // but when supporting moving, this is where we set the valid object (VO) bit
+}
+
+inline void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
+    BumpAllocator* allocator = &mutator->allocators.bump_pointer[MMTK_IMMORTAL_BUMP_ALLOCATOR];
+    return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1);
+}
+
+inline void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
+    if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
+        intptr_t addr = (intptr_t) obj;
+        uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
+        intptr_t shift = (addr >> 3) & 0b111;
+        while(1) {
+            uint8_t old_val = *meta_addr;
+            uint8_t new_val = old_val | (1 << shift);
+            if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) {
+                break;
+            }
+        }
+    }
+}
+
+// allocation wrappers that track allocation and let collection run
+JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    void *data = malloc(sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, sz);
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz);
+    }
+    return data;
+}
+
+JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    void *data = calloc(nm, sz);
+    if (data != NULL && pgcstack != NULL && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, nm * sz);
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz);
+    }
+    return data;
+}
+
+JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    free(p);
+    if (pgcstack != NULL && ct->world_age) {
+        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz);
+    }
+}
+
+JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
+{
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        malloc_maybe_collect(ptls, sz);
+        if (sz < old)
+            jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz);
+        else
+            jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old);
+    }
+    return realloc(p, sz);
+}
+
+void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    size_t allocsz = mmtk_align_alloc_sz(sz);
+    void* addr = mmtk_immortal_alloc_fast(&ptls->gc_tls.mmtk_mutator, allocsz, align, offset);
+    mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, jl_valueof(addr), allocsz);
+    return addr;
+}
+
+void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
+{
+    return jl_gc_perm_alloc_nolock(sz, zero, align, offset);
+}
+
+jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
+{
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
+                                                 sizeof(void*) * 2 : 16));
+    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
+                                                              sizeof(void*) % align);
+    o->header = (uintptr_t)ty;
+    return jl_valueof(o);
+}
+
+JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty)
+{
+    // safepoint
+    jl_gc_safepoint_(ptls);
+
+    jl_value_t *v;
+    if ((uintptr_t)ty != jl_buff_tag) {
+        // v needs to be 16 byte aligned, therefore v_tagged needs to be offset accordingly to consider the size of header
+        jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize, align), align, sizeof(jl_taggedvalue_t));
+        v = jl_valueof(v_tagged);
+        mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize, align));
+    } else {
+        // allocating an extra word to store the size of buffer objects
+        jl_taggedvalue_t *v_tagged = (jl_taggedvalue_t *)mmtk_immix_alloc_fast(&ptls->gc_tls.mmtk_mutator, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align), align, 0);
+        jl_value_t* v_tagged_aligned = ((jl_value_t*)((char*)(v_tagged) + sizeof(jl_taggedvalue_t)));
+        v = jl_valueof(v_tagged_aligned);
+        mmtk_store_obj_size_c(v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align));
+        mmtk_immix_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, v, LLT_ALIGN(osize+sizeof(jl_taggedvalue_t), align));
+    }
+
+    ptls->gc_tls_common.gc_num.allocd += osize;
+    ptls->gc_tls_common.gc_num.poolalloc++;
+
+    return v;
+}
+
+JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t sz)
+{
+    // safepoint
+    jl_gc_safepoint_(ptls);
+
+    size_t offs = offsetof(bigval_t, header);
+    assert(sz >= sizeof(jl_taggedvalue_t) && "sz must include tag");
+    static_assert(offsetof(bigval_t, header) >= sizeof(void*), "Empty bigval header?");
+    static_assert(sizeof(bigval_t) % JL_HEAP_ALIGNMENT == 0, "");
+    size_t allocsz = LLT_ALIGN(sz + offs, JL_CACHE_BYTE_ALIGNMENT);
+    if (allocsz < sz) { // overflow in adding offs, size was "negative"
+        assert(0 && "Error when allocating big object");
+        jl_throw(jl_memory_exception);
+    }
+
+    bigval_t *v = (bigval_t*)mmtk_alloc_large(&ptls->gc_tls.mmtk_mutator, allocsz, JL_CACHE_BYTE_ALIGNMENT, 0, 2);
+
+    if (v == NULL) {
+        assert(0 && "Allocation failed");
+        jl_throw(jl_memory_exception);
+    }
+    v->sz = allocsz;
+
+    ptls->gc_tls_common.gc_num.allocd += allocsz;
+    ptls->gc_tls_common.gc_num.bigalloc++;
+
+    jl_value_t *result = jl_valueof(&v->header);
+    mmtk_post_alloc(&ptls->gc_tls.mmtk_mutator, result, allocsz, 2);
+
+    return result;
+}
+
+// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type)
+{
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
+
+    jl_value_t *val = jl_mmtk_gc_alloc_default(ptls, osize, 16, NULL);
+    maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
+    return val;
+}
+
+// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type)
+{
+    // TODO: assertion needed here?
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
+
+    jl_value_t *val = jl_mmtk_gc_alloc_big(ptls, sz);
+    maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
+    return val;
+}
+
+inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    jl_value_t *v;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        v = jl_mmtk_gc_alloc_default(ptls, allocsz, 16, ty);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        v = jl_mmtk_gc_alloc_big(ptls, allocsz);
+    }
+    jl_set_typeof(v, ty);
+    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
+    return v;
+}
+
+JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    maybe_collect(ptls);
+    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
+    if (allocsz < sz)  // overflow in adding offs, size was "negative"
+        jl_throw(jl_memory_exception);
+
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    void *b = malloc_cache_align(allocsz);
+    if (b == NULL)
+        jl_throw(jl_memory_exception);
+
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+    // FIXME: Should these be part of mmtk's heap?
+    // malloc_maybe_collect(ptls, sz);
+    // jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, allocsz);
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+    // jl_gc_managed_malloc is currently always used for allocating array buffers.
+    maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag);
+    return b;
+}
+
+void jl_gc_notify_image_load(const char* img_data, size_t len)
+{
+    mmtk_set_vm_space((void*)img_data, len);
+}
+
+void jl_gc_notify_image_alloc(char* img_data, size_t len)
+{
+    mmtk_immortal_region_post_alloc((void*)img_data, len);
+}
+
+// mutex for page profile
+uv_mutex_t page_profile_lock;
+
+JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream)
+{
+    uv_mutex_lock(&page_profile_lock);
+    const char *str = "Page profiler in unsupported in MMTk.";
+    ios_write(stream, str, strlen(str));
+    uv_mutex_unlock(&page_profile_lock);
+}
+
+// this seems to be needed by the gc tests
+#define JL_GC_N_MAX_POOLS 51
+JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS];
+
+STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT
+{
+    // FIXME: MMTk would have to provide its own stats
+}
+
+#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants
+
+JL_DLLEXPORT uint64_t jl_get_pg_size(void)
+{
+    return MMTK_GC_PAGE_SZ;
+}
+
+// Not used by mmtk
+// Number of GC threads that may run parallel marking
+int jl_n_markthreads;
+// Number of GC threads that may run concurrent sweeping (0 or 1)
+int jl_n_sweepthreads;
+// `tid` of first GC thread
+int gc_first_tid;
+
+// TODO: Move write barriers from julia.h and add them here
+
+// No inline write barrier -- only used for debugging
+JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT
+{
+    jl_gc_wb_back(parent);
+}
+
+JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT
+{
+    jl_gc_wb(parent, ptr);
+}
+
+JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT
+{
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, (const void*) 0);
+}
+
+JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT
+{
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr);
+}
+
+JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT
+{
+    mmtk_unreachable();
+}
+
+JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
+                                        struct _jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
+    mmtk_unreachable();
+}
+
+// marking
+// ---
+
+JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
+{
+    mmtk_unreachable();
+    return 0;
+}
+JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
+                                            jl_value_t **objs, size_t nobjs)
+{
+    mmtk_unreachable();
+}
+
+JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void)
+{
+    // TODO: meaningful for MMTk?
+    return GC_MAX_SZCLASS;
+}
+
+JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
+{
+    // FIXME: do we need to implement this?
+}
+
+// gc-debug functions
+// ---
+
+JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p)
+{
+    return NULL;
+}
+
+void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT
+{
+}
+
+int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT
+{
+    return 0;
+}
+
+void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT
+{
+    // May not be accurate but should be helpful enough
+    uint64_t pool_count = gc_num.poolalloc;
+    uint64_t big_count = gc_num.bigalloc;
+    jl_safe_printf("Allocations: %" PRIu64 " "
+                   "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n",
+                   pool_count + big_count, pool_count, big_count, gc_num.pause);
+}
+
+JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void)
+{
+    return sizeof(bigval_t);
+}
+
+void jl_print_gc_stats(JL_STREAM *s)
+{
+}
+
+JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
+{
+    return 0;
+}
+
+JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void)
+{
+    return 0;
+}
+
+// TODO: if this is needed, it can be added in MMTk
+JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
+{
+    return NULL;
+}
+
+extern unsigned char mmtk_pin_object(void* obj);
+
+JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) {
+    return mmtk_pin_object(obj);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MMTK_GC
diff --git a/src/gc-mmtk.h b/src/gc-mmtk.h
new file mode 100644
index 0000000000000..6c2c7a40bc81f
--- /dev/null
+++ b/src/gc-mmtk.h
@@ -0,0 +1,34 @@
+#ifdef MMTK_GC
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern jl_mutex_t finalizers_lock;
+extern arraylist_t to_finalize;
+extern arraylist_t finalizer_list_marked;
+
+JL_EXTENSION typedef struct _bigval_t {
+    size_t sz;
+#ifdef _P64 // Add padding so that the value is 64-byte aligned
+    // (8 pointers of 8 bytes each) - (2 other pointers in struct)
+    void *_padding[8 - 2];
+#else
+    // (16 pointers of 4 bytes each) - (2 other pointers in struct)
+    void *_padding[16 - 2];
+#endif
+    //struct jl_taggedvalue_t <>;
+    union {
+        uintptr_t header;
+        struct {
+            uintptr_t gc:2;
+        } bits;
+    };
+    // must be 64-byte aligned here, in 32 & 64 bit modes
+} bigval_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MMTK_GC
diff --git a/src/gc-page-profiler.c b/src/gc-page-profiler.c
index 05666c7a86af2..e5c6b91978731 100644
--- a/src/gc-page-profiler.c
+++ b/src/gc-page-profiler.c
@@ -1,8 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #ifndef MMTK_GC
-
 #include "gc-page-profiler.h"
+#include "julia.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/gc-page-profiler.h b/src/gc-page-profiler.h
index 28989f8f8e206..0dd72ad072fa9 100644
--- a/src/gc-page-profiler.h
+++ b/src/gc-page-profiler.h
@@ -3,7 +3,7 @@
 #ifndef GC_PAGE_PROFILER_H
 #define GC_PAGE_PROFILER_H
 
-#include "gc.h"
+#include "gc-stock.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/gc-pages.c b/src/gc-pages.c
index 725d1c5e795d3..976fc461d5b95 100644
--- a/src/gc-pages.c
+++ b/src/gc-pages.c
@@ -1,8 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #ifndef MMTK_GC
-
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
 #ifndef _OS_WINDOWS_
 #  include <sys/resource.h>
 #endif
@@ -11,6 +11,8 @@
 extern "C" {
 #endif
 
+uv_mutex_t gc_pages_lock;
+
 JL_DLLEXPORT uint64_t jl_get_pg_size(void)
 {
     return GC_PAGE_SZ;
@@ -69,7 +71,7 @@ char *jl_gc_try_alloc_pages_(int pg_cnt) JL_NOTSAFEPOINT
 // more chunks (or other allocations). The final page count is recorded
 // and will be used as the starting count next time. If the page count is
 // smaller `MIN_BLOCK_PG_ALLOC` a `jl_memory_exception` is thrown.
-// Assumes `gc_perm_lock` is acquired, the lock is released before the
+// Assumes `gc_pages_lock` is acquired, the lock is released before the
 // exception is thrown.
 char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
 {
@@ -89,7 +91,7 @@ char *jl_gc_try_alloc_pages(void) JL_NOTSAFEPOINT
             block_pg_cnt = pg_cnt = min_block_pg_alloc;
         }
         else {
-            uv_mutex_unlock(&gc_perm_lock);
+            uv_mutex_unlock(&gc_pages_lock);
             jl_throw(jl_memory_exception);
         }
     }
@@ -129,11 +131,11 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
         goto exit;
     }
 
-    uv_mutex_lock(&gc_perm_lock);
+    uv_mutex_lock(&gc_pages_lock);
     // another thread may have allocated a large block while we were waiting...
     meta = pop_lf_back(&global_page_pool_clean);
     if (meta != NULL) {
-        uv_mutex_unlock(&gc_perm_lock);
+        uv_mutex_unlock(&gc_pages_lock);
         gc_alloc_map_set(meta->data, GC_PAGE_ALLOCATED);
         goto exit;
     }
@@ -151,7 +153,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
             push_lf_back(&global_page_pool_clean, pg);
         }
     }
-    uv_mutex_unlock(&gc_perm_lock);
+    uv_mutex_unlock(&gc_pages_lock);
 exit:
 #ifdef _OS_WINDOWS_
     VirtualAlloc(meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE);
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index 465dce7fda26b..a8fec938456a3 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "gc.h"
+#include "gc-common.h"
+#include "threading.h"
 #ifndef _OS_WINDOWS_
 #  include <sys/resource.h>
 #endif
@@ -22,22 +23,13 @@
 // number of stacks to always keep available per pool
 #define MIN_STACK_MAPPINGS_PER_POOL 5
 
-#if defined(_OS_WINDOWS_) || (!defined(_OS_OPENBSD_) && !defined(JL_HAVE_UCONTEXT) && !defined(JL_HAVE_SIGALTSTACK))
-#define JL_USE_GUARD_PAGE   1
 const size_t jl_guard_size = (4096 * 8);
-#else
-const size_t jl_guard_size = 0;
-#endif
-
 static _Atomic(uint32_t) num_stack_mappings = 0;
 
 #ifdef _OS_WINDOWS_
 #define MAP_FAILED NULL
 static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
 {
-    size_t guard_size = LLT_ALIGN(jl_guard_size, jl_page_size);
-    bufsz += guard_size;
-
     void *stk = VirtualAlloc(NULL, bufsz, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE);
     if (stk == NULL)
         return MAP_FAILED;
@@ -48,77 +40,49 @@ static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
         VirtualFree(stk, 0, MEM_RELEASE);
         return MAP_FAILED;
     }
-    stk = (char *)stk + guard_size;
 
     jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
     return stk;
 }
 
 
-static void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
+void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
-#ifdef JL_USE_GUARD_PAGE
-    size_t guard_size = LLT_ALIGN(jl_guard_size, jl_page_size);
-    bufsz += guard_size;
-    stkbuf = (char *)stkbuf - guard_size;
-#endif
-
     VirtualFree(stkbuf, 0, MEM_RELEASE);
     jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1);
 }
 
 #else
 
-# ifdef _OS_OPENBSD_
 static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
 {
-    void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
-    if (stk == MAP_FAILED)
-        return MAP_FAILED;
-
+# ifdef _OS_OPENBSD_
     // we don't set up a guard page to detect stack overflow: on OpenBSD, any
     // mmap-ed region has guard page managed by the kernel, so there is no
     // need for it. Additionally, a memory region used as stack (memory
     // allocated with MAP_STACK option) has strict permission, and you can't
     // "create" a guard page on such memory by using `mprotect` on it
-
-    jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
-    return stk;
-}
+    void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+    if (stk == MAP_FAILED)
+        return MAP_FAILED;
 # else
-static void *malloc_stack(size_t bufsz) JL_NOTSAFEPOINT
-{
-#ifdef JL_USE_GUARD_PAGE
-    size_t guard_size = LLT_ALIGN(jl_guard_size, jl_page_size);
-    bufsz += guard_size;
-#endif
-
     void* stk = mmap(0, bufsz, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
     if (stk == MAP_FAILED)
         return MAP_FAILED;
 
-#ifdef JL_USE_GUARD_PAGE
     // set up a guard page to detect stack overflow
     if (mprotect(stk, jl_guard_size, PROT_NONE) == -1) {
         munmap(stk, bufsz);
         return MAP_FAILED;
     }
-    stk = (char *)stk + guard_size;
-#endif
+# endif
 
     jl_atomic_fetch_add_relaxed(&num_stack_mappings, 1);
     return stk;
 }
-# endif
 
 void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
 {
-#ifdef JL_USE_GUARD_PAGE
-    size_t guard_size = LLT_ALIGN(jl_guard_size, jl_page_size);
-    bufsz += guard_size;
-    stkbuf = (char *)stkbuf - guard_size;
-#endif
-
     munmap(stkbuf, bufsz);
     jl_atomic_fetch_add_relaxed(&num_stack_mappings, -1);
 }
@@ -167,7 +131,7 @@ void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf);
             return;
         }
     }
@@ -185,18 +149,18 @@ JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
 {
     // avoid adding an original thread stack to the free list
-    if (task == ptls->root_task && !task->copy_stack)
+    if (task == ptls->root_task && !task->ctx.copy_stack)
         return;
-    void *stkbuf = task->stkbuf;
-    size_t bufsz = task->bufsz;
+    void *stkbuf = task->ctx.stkbuf;
+    size_t bufsz = task->ctx.bufsz;
     if (bufsz <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(bufsz);
         if (pool_sizes[pool_id] == bufsz) {
-            task->stkbuf = NULL;
+            task->ctx.stkbuf = NULL;
 #ifdef _COMPILER_ASAN_ENABLED_
             __asan_unpoison_stack_memory((uintptr_t)stkbuf, bufsz);
 #endif
-            small_arraylist_push(&ptls->gc_tls.heap.free_stacks[pool_id], stkbuf);
+            small_arraylist_push(&ptls->gc_tls_common.heap.free_stacks[pool_id], stkbuf);
         }
     }
 }
@@ -211,7 +175,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
         unsigned pool_id = select_pool(ssize);
         ssize = pool_sizes[pool_id];
-        small_arraylist_t *pool = &ptls->gc_tls.heap.free_stacks[pool_id];
+        small_arraylist_t *pool = &ptls->gc_tls_common.heap.free_stacks[pool_id];
         if (pool->len > 0) {
             stk = small_arraylist_pop(pool);
         }
@@ -232,7 +196,7 @@ JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPO
     }
     *bufsz = ssize;
     if (owner) {
-        small_arraylist_t *live_tasks = &ptls->gc_tls.heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls->gc_tls_common.heap.live_tasks;
         mtarraylist_push(live_tasks, owner);
     }
     return stk;
@@ -259,7 +223,7 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT
 
         // free half of stacks that remain unused since last sweep
         for (int p = 0; p < JL_N_STACK_POOLS; p++) {
-            small_arraylist_t *al = &ptls2->gc_tls.heap.free_stacks[p];
+            small_arraylist_t *al = &ptls2->gc_tls_common.heap.free_stacks[p];
             size_t n_to_free;
             if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
                 n_to_free = al->len; // not alive yet or dead, so it does not need these anymore
@@ -281,10 +245,10 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT
             }
         }
         if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
-            small_arraylist_free(ptls2->gc_tls.heap.free_stacks);
+            small_arraylist_free(ptls2->gc_tls_common.heap.free_stacks);
         }
 
-        small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
         size_t n = 0;
         size_t ndel = 0;
         size_t l = live_tasks->len;
@@ -295,17 +259,17 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT
             jl_task_t *t = (jl_task_t*)lst[n];
             assert(jl_is_task(t));
             if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
-                if (t->stkbuf == NULL)
+                if (t->ctx.stkbuf == NULL)
                     ndel++; // jl_release_task_stack called
                 else
                     n++;
             }
             else {
                 ndel++;
-                void *stkbuf = t->stkbuf;
-                size_t bufsz = t->bufsz;
+                void *stkbuf = t->ctx.stkbuf;
+                size_t bufsz = t->ctx.bufsz;
                 if (stkbuf) {
-                    t->stkbuf = NULL;
+                    t->ctx.stkbuf = NULL;
                     _jl_free_stack(ptls2, stkbuf, bufsz);
                 }
 #ifdef _COMPILER_TSAN_ENABLED_
@@ -325,8 +289,6 @@ void sweep_stack_pools(void) JL_NOTSAFEPOINT
     }
 }
 
-extern int gc_first_tid;
-
 JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
 {
     size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
@@ -334,16 +296,12 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
     size_t l = 0; // l is not reset on restart, so we keep getting more aggressive at making a big enough list everything it fails
 restart:
     for (size_t i = 0; i < nthreads; i++) {
-        // skip GC threads since they don't have tasks
-        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
-            continue;
-        }
         jl_ptls_t ptls2 = allstates[i];
         if (ptls2 == NULL)
             continue;
-        small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
         size_t n = mtarraylist_length(live_tasks);
-        l += n + (ptls2->root_task->stkbuf != NULL);
+        l += n + (ptls2->root_task->ctx.stkbuf != NULL);
     }
     l += l / 20; // add 5% for margin of estimation error
     jl_array_t *a = jl_alloc_vec_any(l); // may gc, changing the number of tasks and forcing us to reload everything
@@ -351,24 +309,20 @@ JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
     allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     size_t j = 0;
     for (size_t i = 0; i < nthreads; i++) {
-        // skip GC threads since they don't have tasks
-        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
-            continue;
-        }
         jl_ptls_t ptls2 = allstates[i];
         if (ptls2 == NULL)
             continue;
         jl_task_t *t = ptls2->root_task;
-        if (t->stkbuf != NULL) {
+        if (t->ctx.stkbuf != NULL) {
             if (j == l)
                 goto restart;
             jl_array_data(a,void*)[j++] = t;
         }
-        small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
         size_t n = mtarraylist_length(live_tasks);
         for (size_t i = 0; i < n; i++) {
             jl_task_t *t = (jl_task_t*)mtarraylist_get(live_tasks, i);
-            if (t->stkbuf != NULL) {
+            if (t->ctx.stkbuf != NULL) {
                 if (j == l)
                     goto restart;
                 jl_array_data(a,void*)[j++] = t;
diff --git a/src/gc.c b/src/gc-stock.c
similarity index 84%
rename from src/gc.c
rename to src/gc-stock.c
index ed7188a1b449a..e99db4c54d17e 100644
--- a/src/gc.c
+++ b/src/gc-stock.c
@@ -1,8 +1,10 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #ifndef MMTK_GC
-
-#include "gc.h"
+#include "gc-common.h"
+#include "gc-stock.h"
+#include "gc-alloc-profiler.h"
+#include "gc-heap-snapshot.h"
 #include "gc-page-profiler.h"
 #include "julia.h"
 #include "julia_atomics.h"
@@ -16,6 +18,10 @@
 extern "C" {
 #endif
 
+// Number of GC threads that may run parallel marking
+int jl_n_markthreads;
+// Number of GC threads that may run concurrent sweeping (0 or 1)
+int jl_n_sweepthreads;
 // Number of threads currently running the GC mark-loop
 _Atomic(int) gc_n_threads_marking;
 // Number of threads sweeping
@@ -24,139 +30,19 @@ _Atomic(int) gc_n_threads_sweeping;
 _Atomic(jl_gc_padded_page_stack_t *) gc_allocd_scratch;
 // `tid` of mutator thread that triggered GC
 _Atomic(int) gc_master_tid;
-// Mutex/cond used to synchronize sleep/wakeup of GC threads
+// `tid` of first GC thread
+int gc_first_tid;
+// Mutex/cond used to synchronize wakeup of GC threads on parallel marking
 uv_mutex_t gc_threads_lock;
 uv_cond_t gc_threads_cond;
 // To indicate whether concurrent sweeping should run
 uv_sem_t gc_sweep_assists_needed;
 // Mutex used to coordinate entry of GC threads in the mark loop
 uv_mutex_t gc_queue_observer_lock;
-
-// Linked list of callback functions
-
-typedef void (*jl_gc_cb_func_t)(void);
-
-typedef struct jl_gc_callback_list_t {
-    struct jl_gc_callback_list_t *next;
-    jl_gc_cb_func_t func;
-} jl_gc_callback_list_t;
-
-static jl_gc_callback_list_t *gc_cblist_root_scanner;
-static jl_gc_callback_list_t *gc_cblist_task_scanner;
-static jl_gc_callback_list_t *gc_cblist_pre_gc;
-static jl_gc_callback_list_t *gc_cblist_post_gc;
-static jl_gc_callback_list_t *gc_cblist_notify_external_alloc;
-static jl_gc_callback_list_t *gc_cblist_notify_external_free;
-static jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
-
-#define gc_invoke_callbacks(ty, list, args) \
-    do { \
-        for (jl_gc_callback_list_t *cb = list; \
-                cb != NULL; \
-                cb = cb->next) \
-        { \
-            ((ty)(cb->func)) args; \
-        } \
-    } while (0)
-
-static void jl_gc_register_callback(jl_gc_callback_list_t **list,
-        jl_gc_cb_func_t func)
-{
-    while (*list != NULL) {
-        if ((*list)->func == func)
-            return;
-        list = &((*list)->next);
-    }
-    *list = (jl_gc_callback_list_t *)malloc_s(sizeof(jl_gc_callback_list_t));
-    (*list)->next = NULL;
-    (*list)->func = func;
-}
-
-static void jl_gc_deregister_callback(jl_gc_callback_list_t **list,
-        jl_gc_cb_func_t func)
-{
-    while (*list != NULL) {
-        if ((*list)->func == func) {
-            jl_gc_callback_list_t *tmp = *list;
-            (*list) = (*list)->next;
-            free(tmp);
-            return;
-        }
-        list = &((*list)->next);
-    }
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_root_scanner, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_task_scanner, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_pre_gc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_post_gc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_external_alloc, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_external_free, (jl_gc_cb_func_t)cb);
-}
-
-JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
-{
-    if (enable)
-        jl_gc_register_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
-    else
-        jl_gc_deregister_callback(&gc_cblist_notify_gc_pressure, (jl_gc_cb_func_t)cb);
-}
-
-void jl_gc_notify_image_load(const char* img_data, size_t len)
-{
-    // Do nothing
-}
-
-void jl_gc_notify_image_alloc(char* img_data, size_t len)
-{
-    // Do nothing
-}
-
-// Protect all access to `finalizer_list_marked` and `to_finalize`.
-// For accessing `ptls->finalizers`, the lock is needed if a thread
-// is going to realloc the buffer (of its own list) or accessing the
-// list of another thread
-static uv_mutex_t gc_cache_lock;
+// Tag for sentinel nodes in bigval list
+uintptr_t gc_bigval_sentinel_tag;
+// Table recording number of full GCs due to each reason
+JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
 
 // Flag that tells us whether we need to support conservative marking
 // of objects.
@@ -193,57 +79,11 @@ static _Atomic(int) support_conservative_marking = 0;
  * have proper support of GC transition in codegen, we should execute the
  * finalizers in unmanaged (GC safe) mode.
  */
-int next_sweep_full = 0;
-
-// List of marked big objects.  Not per-thread.  Accessed only by master thread.
-bigval_t *big_objects_marked = NULL;
-
-// -- Finalization --
-
-NOINLINE uintptr_t gc_get_stack_ptr(void)
-{
-    return (uintptr_t)jl_get_frame_addr();
-}
 
-void jl_gc_wait_for_the_world(jl_ptls_t* gc_all_tls_states, int gc_n_threads);
+gc_heapstatus_t gc_heap_stats = {0};
 
-// malloc wrappers, aligned allocation
-
-#if defined(_OS_WINDOWS_)
-inline void *jl_malloc_aligned(size_t sz, size_t align)
-{
-    return _aligned_malloc(sz ? sz : 1, align);
-}
-STATIC_INLINE void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    _aligned_free(p);
-}
-#else
-inline void *jl_malloc_aligned(size_t sz, size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return malloc(sz);
-#endif
-    void *ptr;
-    if (posix_memalign(&ptr, align, sz))
-        return NULL;
-    return ptr;
-}
-inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    free(p);
-}
-#endif
-
-STATIC_INLINE void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
-{
-    arraylist_push(&to_finalize, o);
-    arraylist_push(&to_finalize, f);
-    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
-    // release our lock, and that will have a release barrier by then
-    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
-}
+// List of big objects in oldest generation (`GC_OLD_MARKED`).  Not per-thread.  Accessed only by master thread.
+bigval_t *oldest_generation_of_bigvals = NULL;
 
 // explicitly scheduled objects for the sweepfunc callback
 static void gc_sweep_foreign_objs_in_list(arraylist_t *objs) JL_NOTSAFEPOINT
@@ -274,18 +114,38 @@ static void gc_sweep_foreign_objs(void) JL_NOTSAFEPOINT
     }
 }
 
+// GC knobs and self-measurement variables
+static int64_t last_gc_total_bytes = 0;
+
+// max_total_memory is a suggestion.  We try very hard to stay
+// under this limit, but we will go above it rather than halting.
+#ifdef _P64
+typedef uint64_t memsize_t;
+static const size_t default_collect_interval = 5600 * 1024 * sizeof(void*);
+static size_t total_mem;
+// We expose this to the user/ci as jl_gc_set_max_memory
+static memsize_t max_total_memory = (memsize_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
+#else
+typedef uint32_t memsize_t;
+static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*);
+// Work really hard to stay within 2GB
+// Alternative is to risk running out of address space
+// on 32 bit architectures.
+#define MAX32HEAP 1536 * 1024 * 1024
+static memsize_t max_total_memory = (memsize_t) MAX32HEAP;
+#endif
 // heuristic stuff for https://dl.acm.org/doi/10.1145/3563323
 // start with values that are in the target ranges to reduce transient hiccups at startup
 static uint64_t old_pause_time = 1e7; // 10 ms
 static uint64_t old_mut_time = 1e9; // 1 second
 static uint64_t old_heap_size = 0;
-extern uint64_t old_alloc_diff;
-extern uint64_t old_freed_diff;
+static uint64_t old_alloc_diff = default_collect_interval;
+static uint64_t old_freed_diff = default_collect_interval;
 static uint64_t gc_end_time = 0;
 static int thrash_counter = 0;
 static int thrashing = 0;
-
-extern uint64_t freed_in_runtime;
+// global variables for GC stats
+static uint64_t freed_in_runtime = 0;
 
 // Resetting the object to a young object, this is used when marking the
 // finalizer list to collect them the next time because the object is very
@@ -337,70 +197,35 @@ static int64_t scanned_bytes; // young bytes scanned while marking
 static int64_t perm_scanned_bytes; // old bytes scanned while marking
 int prev_sweep_full = 1;
 int current_sweep_full = 0;
+int next_sweep_full = 0;
 int under_pressure = 0;
 
 // Full collection heuristics
-extern int64_t live_bytes;
+static int64_t live_bytes = 0;
 static int64_t promoted_bytes = 0;
 static int64_t last_live_bytes = 0; // live_bytes at last collection
-static int64_t t_start = 0; // Time GC starts;
 #ifdef __GLIBC__
 // maxrss at last malloc_trim
 static int64_t last_trim_maxrss = 0;
 #endif
 
-static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
+static void gc_sync_cache(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) JL_NOTSAFEPOINT
 {
-    const int nbig = gc_cache->nbig_obj;
-    for (int i = 0; i < nbig; i++) {
-        void *ptr = gc_cache->big_obj[i];
-        bigval_t *hdr = (bigval_t*)gc_ptr_clear_tag(ptr, 1);
-        gc_big_object_unlink(hdr);
-        if (gc_ptr_tag(ptr, 1)) {
-            gc_big_object_link(hdr, &ptls->gc_tls.heap.big_objects);
-        }
-        else {
-            // Move hdr from `big_objects` list to `big_objects_marked list`
-            gc_big_object_link(hdr, &big_objects_marked);
-        }
-    }
-    gc_cache->nbig_obj = 0;
     perm_scanned_bytes += gc_cache->perm_scanned_bytes;
     scanned_bytes += gc_cache->scanned_bytes;
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
 }
 
-static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
-{
-    uv_mutex_lock(&gc_cache_lock);
-    gc_sync_cache_nolock(ptls, &ptls->gc_tls.gc_cache);
-    uv_mutex_unlock(&gc_cache_lock);
-}
-
 // No other threads can be running marking at the same time
-static void gc_sync_all_caches_nolock(jl_ptls_t ptls)
+static void gc_sync_all_caches(jl_ptls_t ptls)
 {
     assert(gc_n_threads);
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 != NULL)
-            gc_sync_cache_nolock(ptls, &ptls2->gc_tls.gc_cache);
-    }
-}
-
-STATIC_INLINE void gc_queue_big_marked(jl_ptls_t ptls, bigval_t *hdr,
-                                       int toyoung) JL_NOTSAFEPOINT
-{
-    const int nentry = sizeof(ptls->gc_tls.gc_cache.big_obj) / sizeof(void*);
-    size_t nobj = ptls->gc_tls.gc_cache.nbig_obj;
-    if (__unlikely(nobj >= nentry)) {
-        gc_sync_cache(ptls);
-        nobj = 0;
+            gc_sync_cache(ptls, &ptls2->gc_tls.gc_cache);
     }
-    uintptr_t v = (uintptr_t)hdr;
-    ptls->gc_tls.gc_cache.big_obj[nobj] = (void*)(toyoung ? (v | 1) : v);
-    ptls->gc_tls.gc_cache.nbig_obj = nobj + 1;
 }
 
 // Atomically set the mark bit for object and return whether it was previously unmarked
@@ -439,16 +264,14 @@ STATIC_INLINE void gc_setmark_big(jl_ptls_t ptls, jl_taggedvalue_t *o,
     bigval_t *hdr = bigval_header(o);
     if (mark_mode == GC_OLD_MARKED) {
         ptls->gc_tls.gc_cache.perm_scanned_bytes += hdr->sz;
-        gc_queue_big_marked(ptls, hdr, 0);
     }
     else {
         ptls->gc_tls.gc_cache.scanned_bytes += hdr->sz;
-        // We can't easily tell if the object is old or being promoted
-        // from the gc bits but if the `age` is `0` then the object
-        // must be already on a young list.
         if (mark_reset_age) {
+            assert(jl_atomic_load(&gc_n_threads_marking) == 0); // `mark_reset_age` is only used during single-threaded marking
             // Reset the object as if it was just allocated
-            gc_queue_big_marked(ptls, hdr, 1);
+            gc_big_object_unlink(hdr);
+            gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, hdr);
         }
     }
 }
@@ -518,7 +341,7 @@ void gc_setmark_buf(jl_ptls_t ptls, void *o, uint8_t mark_mode, size_t minsz) JL
     gc_setmark_buf_(ptls, o, mark_mode, minsz);
 }
 
-inline void maybe_collect(jl_ptls_t ptls)
+STATIC_INLINE void maybe_collect(jl_ptls_t ptls)
 {
     if (jl_atomic_load_relaxed(&gc_heap_stats.heap_size) >= jl_atomic_load_relaxed(&gc_heap_stats.heap_target) || jl_gc_debug_check_other()) {
         jl_gc_collect(JL_GC_AUTO);
@@ -530,13 +353,12 @@ inline void maybe_collect(jl_ptls_t ptls)
 
 // weak references
 
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
-                                                jl_value_t *value)
+JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value)
 {
     jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*),
                                                   jl_weakref_type);
     wr->value = value;  // NOTE: wb not needed here
-    small_arraylist_push(&ptls->gc_tls.heap.weak_refs, wr);
+    small_arraylist_push(&ptls->gc_tls_common.heap.weak_refs, wr);
     return wr;
 }
 
@@ -546,8 +368,8 @@ static void clear_weak_refs(void)
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
         if (ptls2 != NULL) {
-            size_t n, l = ptls2->gc_tls.heap.weak_refs.len;
-            void **lst = ptls2->gc_tls.heap.weak_refs.items;
+            size_t n, l = ptls2->gc_tls_common.heap.weak_refs.len;
+            void **lst = ptls2->gc_tls_common.heap.weak_refs.items;
             for (n = 0; n < l; n++) {
                 jl_weakref_t *wr = (jl_weakref_t*)lst[n];
                 if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
@@ -565,8 +387,8 @@ static void sweep_weak_refs(void)
         if (ptls2 != NULL) {
             size_t n = 0;
             size_t ndel = 0;
-            size_t l = ptls2->gc_tls.heap.weak_refs.len;
-            void **lst = ptls2->gc_tls.heap.weak_refs.items;
+            size_t l = ptls2->gc_tls_common.heap.weak_refs.len;
+            void **lst = ptls2->gc_tls_common.heap.weak_refs.items;
             if (l == 0)
                 continue;
             while (1) {
@@ -581,7 +403,7 @@ static void sweep_weak_refs(void)
                 lst[n] = lst[n + ndel];
                 lst[n + ndel] = tmp;
             }
-            ptls2->gc_tls.heap.weak_refs.len -= ndel;
+            ptls2->gc_tls_common.heap.weak_refs.len -= ndel;
         }
     }
 }
@@ -589,21 +411,24 @@ static void sweep_weak_refs(void)
 
 STATIC_INLINE void jl_batch_accum_heap_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
 {
-    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.alloc_acc) + sz;
+    uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc) + sz;
     if (alloc_acc < 16*1024)
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, alloc_acc);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, alloc_acc);
     else {
         jl_atomic_fetch_add_relaxed(&gc_heap_stats.heap_size, alloc_acc);
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.alloc_acc, 0);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
     }
 }
 
-
+STATIC_INLINE void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc) + sz);
+}
 
 // big value list
 
 // Size includes the tag and the tag is not cleared!!
-inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
+STATIC_INLINE jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
 {
     maybe_collect(ptls);
     size_t offs = offsetof(bigval_t, header);
@@ -618,81 +443,211 @@ inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
         jl_throw(jl_memory_exception);
     gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t,
         gc_cblist_notify_external_alloc, (v, allocsz));
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.bigalloc,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.bigalloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc) + 1);
     jl_batch_accum_heap_size(ptls, allocsz);
 #ifdef MEMDEBUG
     memset(v, 0xee, allocsz);
 #endif
     v->sz = allocsz;
-    gc_big_object_link(v, &ptls->gc_tls.heap.big_objects);
+    gc_big_object_link(ptls->gc_tls.heap.young_generation_of_bigvals, v);
     return jl_valueof(&v->header);
 }
 
-// Sweep list rooted at *pv, removing and freeing any unmarked objects.
-// Return pointer to last `next` field in the culled list.
-static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
+
+// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz, jl_value_t *type)
+{
+    jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
+    maybe_record_alloc_to_profile(val, sz, (jl_datatype_t*)type);
+    return val;
+}
+
+// This wrapper exists only to prevent `jl_gc_big_alloc_inner` from being inlined into
+// its callers. We provide an external-facing interface for callers, and inline `jl_gc_big_alloc_inner`
+// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) {
+    return jl_gc_big_alloc_inner(ptls, sz);
+}
+
+FORCE_INLINE void sweep_unlink_and_free(bigval_t *v) JL_NOTSAFEPOINT
 {
-    bigval_t *v = *pv;
+    gc_big_object_unlink(v);
+    gc_num.freed += v->sz;
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size, jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - v->sz);
+#ifdef MEMDEBUG
+    memset(v, 0xbb, v->sz);
+#endif
+    gc_invoke_callbacks(jl_gc_cb_notify_external_free_t, gc_cblist_notify_external_free, (v));
+    jl_free_aligned(v);
+}
+
+static bigval_t *sweep_list_of_young_bigvals(bigval_t *young) JL_NOTSAFEPOINT
+{
+    bigval_t *last_node = young;
+    bigval_t *v = young->next; // skip the sentinel
+    bigval_t *old = oldest_generation_of_bigvals;
+    int sweep_full = current_sweep_full; // don't load the global in the hot loop
     while (v != NULL) {
         bigval_t *nxt = v->next;
         int bits = v->bits.gc;
         int old_bits = bits;
         if (gc_marked(bits)) {
-            pv = &v->next;
             if (sweep_full || bits == GC_MARKED) {
                 bits = GC_OLD;
+                last_node = v;
+            }
+            else { // `bits == GC_OLD_MARKED`
+                assert(bits == GC_OLD_MARKED);
+                // reached oldest generation, move from young list to old list
+                gc_big_object_unlink(v);
+                gc_big_object_link(old, v);
             }
             v->bits.gc = bits;
         }
         else {
-            // Remove v from list and free it
-            *pv = nxt;
-            if (nxt)
-                nxt->prev = pv;
-            gc_num.freed += v->sz;
-            jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
-                jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - (v->sz));
-#ifdef MEMDEBUG
-            memset(v, 0xbb, v->sz);
-#endif
-            gc_invoke_callbacks(jl_gc_cb_notify_external_free_t,
-                gc_cblist_notify_external_free, (v));
-            jl_free_aligned(v);
+            sweep_unlink_and_free(v);
         }
         gc_time_count_big(old_bits, bits);
         v = nxt;
     }
-    return pv;
+    return last_node;
 }
 
-static void sweep_big(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
+static void sweep_list_of_oldest_bigvals(bigval_t *young) JL_NOTSAFEPOINT
+{
+    bigval_t *v = oldest_generation_of_bigvals->next; // skip the sentinel
+    while (v != NULL) {
+        bigval_t *nxt = v->next;
+        assert(v->bits.gc == GC_OLD_MARKED);
+        v->bits.gc = GC_OLD;
+        gc_time_count_big(GC_OLD_MARKED, GC_OLD);
+        v = nxt;
+    }
+}
+
+static void sweep_big(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     gc_time_big_start();
     assert(gc_n_threads);
+    bigval_t *last_node_in_my_list = NULL;
     for (int i = 0; i < gc_n_threads; i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[i];
-        if (ptls2 != NULL)
-            sweep_big_list(sweep_full, &ptls2->gc_tls.heap.big_objects);
+        if (ptls2 != NULL) {
+            bigval_t *last_node = sweep_list_of_young_bigvals(ptls2->gc_tls.heap.young_generation_of_bigvals);
+            if (ptls == ptls2) {
+                last_node_in_my_list = last_node;
+            }
+        }
     }
-    if (sweep_full) {
-        bigval_t **last_next = sweep_big_list(sweep_full, &big_objects_marked);
-        // Move all survivors from big_objects_marked list to the big_objects list of this thread.
-        if (ptls->gc_tls.heap.big_objects)
-            ptls->gc_tls.heap.big_objects->prev = last_next;
-        *last_next = ptls->gc_tls.heap.big_objects;
-        ptls->gc_tls.heap.big_objects = big_objects_marked;
-        if (ptls->gc_tls.heap.big_objects)
-            ptls->gc_tls.heap.big_objects->prev = &ptls->gc_tls.heap.big_objects;
-        big_objects_marked = NULL;
+    if (current_sweep_full) {
+        sweep_list_of_oldest_bigvals(ptls->gc_tls.heap.young_generation_of_bigvals);
+        // move all nodes in `oldest_generation_of_bigvals` to my list of bigvals
+        assert(last_node_in_my_list != NULL);
+        assert(last_node_in_my_list->next == NULL);
+        last_node_in_my_list->next = oldest_generation_of_bigvals->next; // skip the sentinel
+        if (oldest_generation_of_bigvals->next != NULL) {
+            oldest_generation_of_bigvals->next->prev = last_node_in_my_list;
+        }
+        oldest_generation_of_bigvals->next = NULL;
     }
     gc_time_big_end();
 }
 
-// tracking Memorys with malloc'd storage
-extern void jl_gc_free_memory(jl_value_t *v, int isaligned);
+void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+    jl_batch_accum_heap_size(ptls, sz);
+}
+
+void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT
+{
+    jl_batch_accum_free_size(jl_current_task->ptls, sz);
+}
+
+// Only safe to update the heap inside the GC
+static void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap) JL_NOTSAFEPOINT
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls) {
+            dest->allocd += (jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval);
+            dest->malloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc);
+            dest->realloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc);
+            dest->poolalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc);
+            dest->bigalloc += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.bigalloc);
+            dest->freed += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+            if (update_heap) {
+                uint64_t alloc_acc = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc);
+                freed_in_runtime += jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.free_acc);
+                jl_atomic_store_relaxed(&gc_heap_stats.heap_size, alloc_acc + jl_atomic_load_relaxed(&gc_heap_stats.heap_size));
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+                jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+            }
+        }
+    }
+}
+
+static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
+{
+    int gc_n_threads;
+    jl_ptls_t* gc_all_tls_states;
+    gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    for (int i = 0; i < gc_n_threads; i++) {
+        jl_ptls_t ptls = gc_all_tls_states[i];
+        if (ptls != NULL) {
+            // don't reset `pool_live_bytes` here
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.bigalloc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.alloc_acc, 0);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.free_acc, 0);
+        }
+    }
+}
+
+static int64_t inc_live_bytes(int64_t inc) JL_NOTSAFEPOINT
+{
+    jl_timing_counter_inc(JL_TIMING_COUNTER_HeapSize, inc);
+    return live_bytes += inc;
+}
+
+void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
+{
+    combine_thread_gc_counts(&gc_num, 0);
+    inc_live_bytes(gc_num.deferred_alloc + gc_num.allocd);
+    gc_num.allocd = 0;
+    gc_num.deferred_alloc = 0;
+    reset_thread_gc_counts();
+}
+
+static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
+{
+    assert(jl_is_genericmemory(v));
+    jl_genericmemory_t *m = (jl_genericmemory_t*)v;
+    assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
+    char *d = (char*)m->ptr;
+    if (isaligned)
+        jl_free_aligned(d);
+    else
+        free(d);
+    jl_atomic_store_relaxed(&gc_heap_stats.heap_size,
+        jl_atomic_load_relaxed(&gc_heap_stats.heap_size) - jl_genericmemory_nbytes(m));
+    gc_num.freed += jl_genericmemory_nbytes(m);
+    gc_num.freecall++;
+}
+
 static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
 {
     gc_time_mallocd_memory_start();
@@ -700,10 +655,10 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
     for (int t_i = 0; t_i < gc_n_threads; t_i++) {
         jl_ptls_t ptls2 = gc_all_tls_states[t_i];
         if (ptls2 != NULL) {
-            mallocarray_t *ma = ptls2->gc_tls.heap.mallocarrays;
-            mallocarray_t **pma = &ptls2->gc_tls.heap.mallocarrays;
+            mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays;
+            mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays;
             while (ma != NULL) {
-                mallocarray_t *nxt = ma->next;
+                mallocmemory_t *nxt = ma->next;
                 jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
                 int bits = jl_astaggedvalue(a)->bits.gc;
                 if (gc_marked(bits)) {
@@ -713,8 +668,8 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
                     *pma = nxt;
                     int isaligned = (uintptr_t)ma->a & 1;
                     jl_gc_free_memory(a, isaligned);
-                    ma->next = ptls2->gc_tls.heap.mafreelist;
-                    ptls2->gc_tls.heap.mafreelist = ma;
+                    ma->next = ptls2->gc_tls_common.heap.mafreelist;
+                    ptls2->gc_tls_common.heap.mafreelist = ma;
                 }
                 gc_time_count_mallocd_memory(bits);
                 ma = nxt;
@@ -749,7 +704,7 @@ pagetable_t alloc_map;
 static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 {
     // Do not pass in `ptls` as argument. This slows down the fast path
-    // in pool_alloc significantly
+    // in small_alloc significantly
     jl_ptls_t ptls = jl_current_task->ptls;
     jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
     pg->osize = p->osize;
@@ -763,24 +718,24 @@ static NOINLINE jl_taggedvalue_t *gc_add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 }
 
 // Size includes the tag and the tag is not cleared!!
-inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset,
+STATIC_INLINE jl_value_t *jl_gc_small_alloc_inner(jl_ptls_t ptls, int offset,
                                           int osize)
 {
     // Use the pool offset instead of the pool address as the argument
     // to workaround a llvm bug.
     // Ref https://llvm.org/bugs/show_bug.cgi?id=27190
-    jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + pool_offset);
+    jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + offset);
     assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
 #ifdef MEMDEBUG
     return jl_gc_big_alloc(ptls, osize, NULL);
 #endif
     maybe_collect(ptls);
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + osize);
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + osize);
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.poolalloc,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.poolalloc) + 1);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.poolalloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.poolalloc) + 1);
     // first try to use the freelist
     jl_taggedvalue_t *v = p->freelist;
     if (v != NULL) {
@@ -820,6 +775,43 @@ inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset,
     return jl_valueof(v);
 }
 
+// Instrumented version of jl_gc_small_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_small_alloc(jl_ptls_t ptls, int offset, int osize, jl_value_t* type)
+{
+    jl_value_t *val = jl_gc_small_alloc_inner(ptls, offset, osize);
+    maybe_record_alloc_to_profile(val, osize, (jl_datatype_t*)type);
+    return val;
+}
+
+// This wrapper exists only to prevent `jl_gc_small_alloc_inner` from being inlined into
+// its callers. We provide an external-facing interface for callers, and inline `jl_gc_small_alloc_inner`
+// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset, int osize) {
+    return jl_gc_small_alloc_inner(ptls, offset, osize);
+}
+
+// Size does NOT include the type tag!!
+inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
+{
+    jl_value_t *v;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        int pool_id = jl_gc_szclass(allocsz);
+        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
+        int osize = jl_gc_sizeclasses[pool_id];
+        // We call `jl_gc_small_alloc_noinline` instead of `jl_gc_small_alloc` to avoid double-counting in
+        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+        v = jl_gc_small_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        v = jl_gc_big_alloc_noinline(ptls, allocsz);
+    }
+    jl_set_typeof(v, ty);
+    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
+    return v;
+}
 
 int jl_gc_classify_pools(size_t sz, int *osize)
 {
@@ -980,8 +972,8 @@ static void gc_sweep_page(gc_page_profiler_serializer_t *s, jl_gc_pool_t *p, jl_
     // instead of adding it to the thread that originally allocated the page, so we can avoid
     // an atomic-fetch-add here.
     size_t delta = (GC_PAGE_SZ - GC_PAGE_OFFSET - nfree * osize);
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes,
-        jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.pool_live_bytes) + delta);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.pool_live_bytes) + delta);
     jl_atomic_fetch_add_relaxed((_Atomic(int64_t) *)&gc_num.freed, (nfree - old_nfree) * osize);
 }
 
@@ -1002,7 +994,7 @@ static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
     sweep_stack_pools();
     gc_sweep_foreign_objs();
     sweep_malloced_memory();
-    sweep_big(ptls, sweep_full);
+    sweep_big(ptls);
     jl_engine_sweep(gc_all_tls_states);
 }
 
@@ -1237,7 +1229,7 @@ static void gc_sweep_pool(void)
             }
             continue;
         }
-        jl_atomic_store_relaxed(&ptls2->gc_tls.gc_num.pool_live_bytes, 0);
+        jl_atomic_store_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes, 0);
         for (int i = 0; i < JL_GC_N_POOLS; i++) {
             jl_gc_pool_t *p = &ptls2->gc_tls.heap.norm_pools[i];
             jl_taggedvalue_t *last = p->freelist;
@@ -2149,9 +2141,9 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                                         (ta, tid != -1 && ta == gc_all_tls_states[tid]->root_task));
                 }
         #ifdef COPY_STACKS
-                void *stkbuf = ta->stkbuf;
-                if (stkbuf && ta->copy_stack) {
-                    gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
+                void *stkbuf = ta->ctx.stkbuf;
+                if (stkbuf && ta->ctx.copy_stack) {
+                    gc_setmark_buf_(ptls, stkbuf, bits, ta->ctx.bufsz);
                     // For gc_heap_snapshot_record:
                     // TODO: attribute size of stack
                     // TODO: edge to stack data
@@ -2164,12 +2156,12 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
                 uintptr_t lb = 0;
                 uintptr_t ub = (uintptr_t)-1;
         #ifdef COPY_STACKS
-                if (stkbuf && ta->copy_stack && !ta->ptls) {
+                if (stkbuf && ta->ctx.copy_stack && !ta->ptls) {
                     int16_t tid = jl_atomic_load_relaxed(&ta->tid);
                     assert(tid >= 0);
                     jl_ptls_t ptls2 = gc_all_tls_states[tid];
                     ub = (uintptr_t)ptls2->stackbase;
-                    lb = ub - ta->copy_stack;
+                    lb = ub - ta->ctx.copy_stack;
                     offset = (uintptr_t)stkbuf - lb;
                 }
         #endif
@@ -2312,6 +2304,16 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
         if (npointers == 0)
             return;
         uintptr_t nptr = (npointers << 2 | (bits & GC_OLD));
+        if (vt == jl_binding_partition_type) {
+            // BindingPartition has a special union of jl_value_t and flag bits
+            // but is otherwise regular.
+            jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_valueof(o);
+            jl_value_t *val = decode_restriction_value(
+                jl_atomic_load_relaxed(&bpart->restriction));
+            if (val)
+                gc_heap_snapshot_record_binding_partition_edge((jl_value_t*)bpart, val);
+            gc_try_claim_and_push(mq, val, &nptr);
+        }
         assert((layout->nfields > 0 || layout->flags.fielddesc_type == 3) &&
                "opaque types should have been handled specially");
         if (layout->flags.fielddesc_type == 0) {
@@ -2402,8 +2404,6 @@ JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
     gc_drain_own_chunkqueue(ptls, &ptls->gc_tls.mark_queue);
 }
 
-extern int gc_first_tid;
-
 void gc_mark_and_steal(jl_ptls_t ptls)
 {
     int master_tid = jl_atomic_load(&gc_master_tid);
@@ -2789,6 +2789,63 @@ static void sweep_finalizer_list(arraylist_t *list)
     list->len = j;
 }
 
+int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT {
+    return gc_is_parallel_collector_thread(tid) || gc_is_concurrent_collector_thread(tid);
+}
+
+JL_DLLEXPORT void jl_gc_get_total_bytes(int64_t *bytes) JL_NOTSAFEPOINT
+{
+    jl_gc_num_t num = gc_num;
+    combine_thread_gc_counts(&num, 0);
+    // Sync this logic with `base/util.jl:GC_Diff`
+    *bytes = (num.total_allocd + num.deferred_alloc + num.allocd);
+}
+
+JL_DLLEXPORT jl_gc_num_t jl_gc_num(void)
+{
+    jl_gc_num_t num = gc_num;
+    combine_thread_gc_counts(&num, 0);
+    return num;
+}
+
+// TODO: these were supposed to be thread local
+JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT
+{
+    int64_t oldtb = last_gc_total_bytes;
+    int64_t newtb;
+    jl_gc_get_total_bytes(&newtb);
+    last_gc_total_bytes = newtb;
+    return newtb - oldtb;
+}
+
+JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT
+{
+    int64_t oldtb = last_gc_total_bytes;
+    int64_t newtb;
+    jl_gc_get_total_bytes(&newtb);
+    last_gc_total_bytes = newtb - offset;
+    return newtb - oldtb;
+}
+
+JL_DLLEXPORT int64_t jl_gc_pool_live_bytes(void)
+{
+    int n_threads = jl_atomic_load_acquire(&jl_n_threads);
+    jl_ptls_t *all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
+    int64_t pool_live_bytes = 0;
+    for (int i = 0; i < n_threads; i++) {
+        jl_ptls_t ptls2 = all_tls_states[i];
+        if (ptls2 != NULL) {
+            pool_live_bytes += jl_atomic_load_relaxed(&ptls2->gc_tls_common.gc_num.pool_live_bytes);
+        }
+    }
+    return pool_live_bytes;
+}
+
+JL_DLLEXPORT int64_t jl_gc_live_bytes(void)
+{
+    return live_bytes;
+}
+
 uint64_t jl_gc_smooth(uint64_t old_val, uint64_t new_val, double factor)
 {
     double est = factor * old_val + (1 - factor) * new_val;
@@ -2925,7 +2982,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // marking is over
 
     // Flush everything in mark cache
-    gc_sync_all_caches_nolock(ptls);
+    gc_sync_all_caches(ptls);
 
 
     gc_verify(ptls);
@@ -2952,10 +3009,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     // we either free some space or get an OOM error.
     if (gc_sweep_always_full) {
         sweep_full = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL);
     }
     if (collection == JL_GC_FULL && !prev_sweep_full) {
         sweep_full = 1;
         recollect = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_FORCED_FULL_SWEEP);
     }
     if (sweep_full) {
         // these are the difference between the number of gc-perm bytes scanned
@@ -3091,10 +3150,17 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     }
 
     double old_ratio = (double)promoted_bytes/(double)heap_size;
-    if (heap_size > user_max || old_ratio > 0.15)
+    if (heap_size > user_max) {
         next_sweep_full = 1;
-    else
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_USER_MAX_EXCEEDED);
+    }
+    else if (old_ratio > 0.15) {
+        next_sweep_full = 1;
+        gc_count_full_sweep_reason(FULL_SWEEP_REASON_LARGE_PROMOTION_RATE);
+    }
+    else {
         next_sweep_full = 0;
+    }
     if (heap_size > user_max || thrashing)
         under_pressure = 1;
     // sweeping is over
@@ -3116,13 +3182,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
         }
         // free empty GC state for threads that have exited
         if (jl_atomic_load_relaxed(&ptls2->current_task) == NULL) {
-            if (gc_is_parallel_collector_thread(t_i))
-                continue;
+            // GC threads should never exit
+            assert(!gc_is_collector_thread(t_i));
+            jl_thread_heap_common_t *common_heap = &ptls2->gc_tls_common.heap;
             jl_thread_heap_t *heap = &ptls2->gc_tls.heap;
-            if (heap->weak_refs.len == 0)
-                small_arraylist_free(&heap->weak_refs);
-            if (heap->live_tasks.len == 0)
-                small_arraylist_free(&heap->live_tasks);
+            if (common_heap->weak_refs.len == 0)
+                small_arraylist_free(&common_heap->weak_refs);
+            if (common_heap->live_tasks.len == 0)
+                small_arraylist_free(&common_heap->live_tasks);
             if (heap->remset.len == 0)
                 arraylist_free(&heap->remset);
             if (ptls2->finalizers.len == 0)
@@ -3157,7 +3224,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     live_bytes += -gc_num.freed + gc_num.allocd;
     jl_timing_counter_dec(JL_TIMING_COUNTER_HeapSize, gc_num.freed);
 
-    gc_time_summary(sweep_full, t_start, gc_end_time, gc_num.freed,
+    gc_time_summary(sweep_full, gc_start_time, gc_end_time, gc_num.freed,
                     live_bytes, gc_num.interval, pause,
                     gc_num.time_to_safepoint,
                     gc_num.mark_time, gc_num.sweep_time);
@@ -3191,8 +3258,8 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
     if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
-        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval;
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval);
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
         static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
         jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
         return;
@@ -3294,11 +3361,10 @@ void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
     gc_mark_roots(mq);
 }
 
-// allocator entry points
-
 // Per-thread initialization
 void jl_init_thread_heap(jl_ptls_t ptls)
 {
+    jl_thread_heap_common_t *common_heap = &ptls->gc_tls_common.heap;
     jl_thread_heap_t *heap = &ptls->gc_tls.heap;
     jl_gc_pool_t *p = heap->norm_pools;
     for (int i = 0; i < JL_GC_N_POOLS; i++) {
@@ -3306,13 +3372,15 @@ void jl_init_thread_heap(jl_ptls_t ptls)
         p[i].freelist = NULL;
         p[i].newpages = NULL;
     }
-    small_arraylist_new(&heap->weak_refs, 0);
-    small_arraylist_new(&heap->live_tasks, 0);
+    small_arraylist_new(&common_heap->weak_refs, 0);
+    small_arraylist_new(&common_heap->live_tasks, 0);
     for (int i = 0; i < JL_N_STACK_POOLS; i++)
-        small_arraylist_new(&heap->free_stacks[i], 0);
-    heap->mallocarrays = NULL;
-    heap->mafreelist = NULL;
-    heap->big_objects = NULL;
+        small_arraylist_new(&common_heap->free_stacks[i], 0);
+    common_heap->mallocarrays = NULL;
+    common_heap->mafreelist = NULL;
+    heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
+    assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
+    heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;
     arraylist_new(&heap->remset, 0);
     arraylist_new(&ptls->finalizers, 0);
     arraylist_new(&ptls->gc_tls.sweep_objs, 0);
@@ -3320,7 +3388,6 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     jl_gc_mark_cache_t *gc_cache = &ptls->gc_tls.gc_cache;
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
-    gc_cache->nbig_obj = 0;
 
     // Initialize GC mark-queue
     jl_gc_markqueue_t *mq = &ptls->gc_tls.mark_queue;
@@ -3336,13 +3403,8 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     jl_atomic_store_relaxed(&q->array, wsa2);
     arraylist_new(&mq->reclaim_set, 32);
 
-    memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num));
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval);
-}
-
-void jl_deinit_thread_heap(jl_ptls_t ptls)
-{
-    // Do nothing
+    memset(&ptls->gc_tls_common.gc_num, 0, sizeof(ptls->gc_tls_common.gc_num));
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd, -(int64_t)gc_num.interval);
 }
 
 void jl_free_thread_gc_state(jl_ptls_t ptls)
@@ -3357,18 +3419,116 @@ void jl_free_thread_gc_state(jl_ptls_t ptls)
     arraylist_free(&mq->reclaim_set);
 }
 
+void jl_start_gc_threads(void)
+{
+    int nthreads = jl_atomic_load_relaxed(&jl_n_threads);
+    int ngcthreads = jl_n_gcthreads;
+    int nmutator_threads = nthreads - ngcthreads;
+    uv_thread_t uvtid;
+    for (int i = nmutator_threads; i < nthreads; ++i) {
+        jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
+        t->tid = i;
+        t->barrier = &thread_init_done;
+        if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
+            uv_thread_create(&uvtid, jl_concurrent_gc_threadfun, t);
+        }
+        else {
+            uv_thread_create(&uvtid, jl_parallel_gc_threadfun, t);
+        }
+        uv_thread_detach(&uvtid);
+    }
+}
+
+STATIC_INLINE int may_mark(void) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&gc_n_threads_marking) > 0);
+}
+
+STATIC_INLINE int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    return (jl_atomic_load(&ptls->gc_tls.gc_sweeps_requested) > 0);
+}
+
+// parallel gc thread function
+void jl_parallel_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_PARALLEL_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        uv_mutex_lock(&gc_threads_lock);
+        while (!may_mark() && !may_sweep(ptls)) {
+            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
+        }
+        uv_mutex_unlock(&gc_threads_lock);
+        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+        gc_mark_loop_parallel(ptls, 0);
+        if (may_sweep(ptls)) {
+            assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
+            gc_sweep_pool_parallel(ptls);
+            jl_atomic_fetch_add(&ptls->gc_tls.gc_sweeps_requested, -1);
+        }
+    }
+}
+
+// concurrent gc thread function
+void jl_concurrent_gc_threadfun(void *arg)
+{
+    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+
+    // initialize this thread (set tid and create heap)
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+    void *stack_lo, *stack_hi;
+    jl_init_stack_limits(0, &stack_lo, &stack_hi);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
+    // wait for all threads
+    jl_gc_state_set(ptls, JL_GC_CONCURRENT_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
+    uv_barrier_wait(targ->barrier);
+
+    // free the thread argument here
+    free(targ);
+
+    while (1) {
+        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_CONCURRENT_COLLECTOR_THREAD);
+        uv_sem_wait(&gc_sweep_assists_needed);
+        gc_free_pages();
+    }
+}
+
 // System-wide initializations
 void jl_gc_init(void)
 {
     JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
     JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
     uv_mutex_init(&page_profile_lock);
-    uv_mutex_init(&gc_cache_lock);
     uv_mutex_init(&gc_perm_lock);
+    uv_mutex_init(&gc_pages_lock);
     uv_mutex_init(&gc_threads_lock);
     uv_cond_init(&gc_threads_cond);
     uv_sem_init(&gc_sweep_assists_needed, 0);
     uv_mutex_init(&gc_queue_observer_lock);
+    void *_addr = (void*)calloc_s(1); // dummy allocation to get the sentinel tag
+    uintptr_t addr = (uintptr_t)_addr;
+    gc_bigval_sentinel_tag = addr;
+    oldest_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
+    oldest_generation_of_bigvals->header = gc_bigval_sentinel_tag;
 
     jl_gc_init_page();
     jl_gc_debug_init();
@@ -3377,7 +3537,6 @@ void jl_gc_init(void)
     arraylist_new(&to_finalize, 0);
     jl_atomic_store_relaxed(&gc_heap_stats.heap_target, default_collect_interval);
     gc_num.interval = default_collect_interval;
-    last_long_collect_interval = default_collect_interval;
     gc_num.allocd = 0;
     gc_num.max_pause = 0;
     gc_num.max_memory = 0;
@@ -3398,8 +3557,19 @@ void jl_gc_init(void)
             hint = min_heap_size_hint;
         jl_gc_set_max_memory(hint - mem_reserve);
     }
+}
+
+JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem)
+{
+#ifdef _P32
+    max_mem = max_mem < MAX32HEAP ? max_mem : MAX32HEAP;
+#endif
+    max_total_memory = max_mem;
+}
 
-    t_start = jl_hrtime();
+JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void)
+{
+    return max_total_memory;
 }
 
 // allocation wrappers that track allocation and let collection run
@@ -3412,10 +3582,10 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
     if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + sz);
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc,
-            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
         jl_batch_accum_heap_size(ptls, sz);
     }
     return data;
@@ -3429,16 +3599,15 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
     if (data != NULL && pgcstack != NULL && ct->world_age) {
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
-            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + nm*sz);
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.malloc,
-            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.malloc) + 1);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + nm*sz);
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
         jl_batch_accum_heap_size(ptls, sz * nm);
     }
     return data;
 }
 
-extern void jl_batch_accum_free_size(jl_ptls_t ptls, uint64_t sz) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
 {
     jl_gcframe_t **pgcstack = jl_get_pgcstack();
@@ -3458,10 +3627,10 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
         jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         if (!(sz < old))
-            jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd,
-                jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + (sz - old));
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.realloc,
-            jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.realloc) + 1);
+            jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+                jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + (sz - old));
+        jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.realloc,
+            jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.realloc) + 1);
 
         int64_t diff = sz - old;
         if (diff < 0) {
@@ -3474,14 +3643,36 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size
     return data;
 }
 
-//_unchecked_calloc does not check for potential overflow of nm*sz
-STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
-    size_t nmsz = nm*sz;
-    int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
-    if (p == NULL)
-        return NULL;
-    p[0] = nmsz;
-    return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
+// allocating blocks for Arrays and Strings
+
+JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    maybe_collect(ptls);
+    size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
+    if (allocsz < sz)  // overflow in adding offs, size was "negative"
+        jl_throw(jl_memory_exception);
+
+    int last_errno = errno;
+#ifdef _OS_WINDOWS_
+    DWORD last_error = GetLastError();
+#endif
+    void *b = malloc_cache_align(allocsz);
+    if (b == NULL)
+        jl_throw(jl_memory_exception);
+
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_tls_common.gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_tls_common.gc_num.malloc) + 1);
+    jl_batch_accum_heap_size(ptls, allocsz);
+#ifdef _OS_WINDOWS_
+    SetLastError(last_error);
+#endif
+    errno = last_errno;
+    // jl_gc_managed_malloc is currently always used for allocating array buffers.
+    maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag);
+    return b;
 }
 
 // Perm gen allocator
@@ -3489,7 +3680,7 @@ STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
 #define GC_PERM_POOL_SIZE (2 * 1024 * 1024)
 // 20k limit for pool allocation. At most 1% fragmentation
 #define GC_PERM_POOL_LIMIT (20 * 1024)
-
+uv_mutex_t gc_perm_lock;
 static uintptr_t gc_perm_pool = 0;
 static uintptr_t gc_perm_end = 0;
 
@@ -3528,7 +3719,7 @@ STATIC_INLINE void *gc_try_perm_alloc_pool(size_t sz, unsigned align, unsigned o
 }
 
 // **NOT** a safepoint
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
+void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset) JL_NOTSAFEPOINT
 {
     // The caller should have acquired `gc_perm_lock`
     assert(align < GC_PERM_POOL_LIMIT);
@@ -3573,6 +3764,18 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
     return p;
 }
 
+jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
+{
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
+                                                 sizeof(void*) * 2 : 16));
+    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
+                                                              sizeof(void*) % align);
+    uintptr_t tag = (uintptr_t)ty;
+    o->header = tag | GC_OLD_MARKED;
+    return jl_valueof(o);
+}
+
 JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
 {
     if (jl_is_initialized()) {
@@ -3689,29 +3892,33 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
     return NULL;
 }
 
-// gc thread function
-void jl_gc_threadfun(void *arg)
+JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void)
 {
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
+    return GC_MAX_SZCLASS;
+}
 
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
+JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void)
+{
+    return sizeof(bigval_t);
+}
 
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_STATE_WAITING, 0);
-    uv_barrier_wait(targ->barrier);
+JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *obj)
+{
+    arraylist_push(&ptls->gc_tls.sweep_objs, obj);
+}
 
-    // free the thread argument here
-    free(targ);
+void jl_gc_notify_image_load(const char* img_data, size_t len)
+{
+    // Do nothing
+}
 
-    while (1) {
-        uv_mutex_lock(&gc_threads_lock);
-        while (jl_atomic_load(&gc_n_threads_marking) == 0) {
-            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
-        }
-        uv_mutex_unlock(&gc_threads_lock);
-        gc_mark_loop_parallel(ptls, 0);
-    }
+void jl_gc_notify_image_alloc(const char* img_data, size_t len)
+{
+    // Do nothing
+}
+
+JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) {
+    return 0;
 }
 
 // added for MMTk integration
diff --git a/src/gc.h b/src/gc-stock.h
similarity index 80%
rename from src/gc.h
rename to src/gc-stock.h
index c9320a6dbd837..686753fd37349 100644
--- a/src/gc.h
+++ b/src/gc-stock.h
@@ -4,8 +4,8 @@
   allocation and garbage collection
   . non-moving, precise mark and sweep collector
   . pool-allocates small objects, keeps big objects on a simple list
-  MMTk alternative
 */
+#ifndef MMTK_GC
 
 #ifndef JL_GC_H
 #define JL_GC_H
@@ -19,56 +19,12 @@
 #include "julia.h"
 #include "julia_threads.h"
 #include "julia_internal.h"
-#include "threading.h"
-#ifndef _OS_WINDOWS_
-#include <sys/mman.h>
-#if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-#endif
 #include "julia_assert.h"
-#include "gc-heap-snapshot.h"
-#include "gc-alloc-profiler.h"
-
-// interface from and to gc-common.c
-extern void maybe_collect(jl_ptls_t ptls);
-extern void run_finalizer(jl_task_t *ct, void *o, void *ff);
-extern void *jl_malloc_aligned(size_t sz, size_t align);
-extern void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz, size_t align);
-extern void jl_free_aligned(void *p);
-extern void *jl_gc_counted_calloc(size_t nm, size_t sz);
-extern void jl_gc_counted_free_with_size(void *p, size_t sz);
-extern void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz);
-extern void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f);
-extern void jl_finalize_th(jl_task_t *ct, jl_value_t *o);
-extern jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value);
-extern jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz);
-extern jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize);
-extern void jl_rng_split(uint64_t to[JL_RNG_SIZE], uint64_t from[JL_RNG_SIZE]);
-extern void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t oldsz,
-                                 int isaligned, jl_value_t *owner, int8_t can_collect);
-extern void run_finalizers(jl_task_t *ct, int finalizers_thread);
-
-#define malloc_cache_align(sz) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
-#define realloc_cache_align(p, sz, oldsz) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
-
-// common types and globals
-#ifdef _P64
-typedef uint64_t memsize_t;
-#else
-typedef uint32_t memsize_t;
-#endif
+#include "threading.h"
 
-extern const size_t default_collect_interval;
-extern const size_t max_collect_interval;
-extern size_t last_long_collect_interval;
-extern size_t total_mem;
-extern memsize_t max_total_memory;
-extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter;
-extern jl_mutex_t heapsnapshot_lock;
-extern uint64_t finalizer_rngState[];
-extern int gc_n_threads;
-extern jl_ptls_t* gc_all_tls_states;
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 #ifdef GC_SMALL_PAGE
 #define GC_PAGE_LG2 12 // log2(size of a page)
@@ -78,137 +34,6 @@ extern jl_ptls_t* gc_all_tls_states;
 #define GC_PAGE_SZ (1 << GC_PAGE_LG2)
 #define GC_PAGE_OFFSET (JL_HEAP_ALIGNMENT - (sizeof(jl_taggedvalue_t) % JL_HEAP_ALIGNMENT))
 
-// This struct must be kept in sync with the Julia type of the same name in base/timing.jl
-typedef struct {
-    int64_t     allocd;
-    int64_t     deferred_alloc;
-    int64_t     freed;
-    uint64_t    malloc;
-    uint64_t    realloc;
-    uint64_t    poolalloc;
-    uint64_t    bigalloc;
-    uint64_t    freecall;
-    uint64_t    total_time;
-    uint64_t    total_allocd;
-    size_t      interval;
-    int         pause;
-    int         full_sweep;
-    uint64_t    max_pause;
-    uint64_t    max_memory;
-    uint64_t    time_to_safepoint;
-    uint64_t    max_time_to_safepoint;
-    uint64_t    total_time_to_safepoint;
-    uint64_t    sweep_time;
-    uint64_t    mark_time;
-    uint64_t    total_sweep_time;
-    uint64_t    total_mark_time;
-    uint64_t    last_full_sweep;
-    uint64_t    last_incremental_sweep;
-} jl_gc_num_t;
-
-typedef struct {
-    _Atomic(size_t) bytes_mapped;
-    _Atomic(size_t) bytes_resident;
-    _Atomic(size_t) heap_size;
-    _Atomic(size_t) heap_target;
-} gc_heapstatus_t;
-
-extern jl_gc_num_t gc_num;
-
-// data structure for tracking malloc'd arrays.
-typedef struct _mallocarray_t {
-    jl_value_t *a;
-    struct _mallocarray_t *next;
-} mallocarray_t;
-
-extern void combine_thread_gc_counts(jl_gc_num_t *dest, int update_heap);
-extern void reset_thread_gc_counts(void);
-
-// layout for big (>2k) objects
-JL_EXTENSION typedef struct _bigval_t {
-    struct _bigval_t *next;
-    struct _bigval_t **prev; // pointer to the next field of the prev entry
-    size_t sz;
-#ifdef _P64 // Add padding so that the value is 64-byte aligned
-    // (8 pointers of 8 bytes each) - (4 other pointers in struct)
-    void *_padding[8 - 4];
-#else
-    // (16 pointers of 4 bytes each) - (4 other pointers in struct)
-    void *_padding[16 - 4];
-#endif
-    //struct jl_taggedvalue_t <>;
-    union {
-        uintptr_t header;
-        struct {
-            uintptr_t gc:2;
-        } bits;
-    };
-    // must be 64-byte aligned here, in 32 & 64 bit modes
-} bigval_t;
-
-STATIC_INLINE uintptr_t gc_ptr_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
-{
-    return ((uintptr_t)v) & mask;
-}
-
-STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) JL_NOTSAFEPOINT
-{
-    return (void*)(((uintptr_t)v) & ~mask);
-}
-
-STATIC_INLINE int gc_marked(uintptr_t bits) JL_NOTSAFEPOINT
-{
-    return (bits & GC_MARKED) != 0;
-}
-
-#ifdef GC_VERIFY
-#error "GC_VERIFY is unsupported with MMTk"
-#endif
-
-#ifdef MEMFENCE
-#error "MEMFENCE is unsupported with MMTk"
-#endif
-
-#ifdef GC_DEBUG_ENV
-#error "GC_DEBUG_ENV is unsupported with MMTk"
-#endif
-
-#ifdef GC_FINAL_STATS
-#error "GC_FINAL_STATS is currently unsupported with MMTk"
-#endif
-
-#ifdef GC_TIME
-#error "GC_TIME is currently unsupported with MMTk"
-#endif
-
-#ifdef MEMPROFILE
-#error "MEMPROFILE is not supported with MMTk"
-#endif
-
-#ifdef OBJPROFILE
-#ifdef MMTK_GC
-#warning "OBJPROFILE is unsupported with MMTk; disabling"
-#undef OBJPROFILE
-#endif
-#endif
-
-
-#ifdef MMTK_GC
-#include "mmtk.h"
-
-typedef struct {
-    char c;
-} jl_gc_pagemeta_t;
-
-#else  // !MMTK_GC
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define jl_malloc_tag ((void*)0xdeadaa01)
-#define jl_singleton_tag ((void*)0xdeadaa02)
-
 // Used by GC_DEBUG_ENV
 typedef struct {
     uint64_t num;
@@ -257,9 +82,30 @@ typedef struct _jl_gc_chunk_t {
 
 #define GC_REMSET_PTR_TAG (0x1)             // lowest bit of `jl_value_t *` is tagged if it's in the remset
 
-// data structure for tracking malloc'd arrays and genericmemory.
+// layout for big (>2k) objects
 
+extern uintptr_t gc_bigval_sentinel_tag;
 
+JL_EXTENSION typedef struct _bigval_t {
+    struct _bigval_t *next;
+    struct _bigval_t *prev;
+    size_t sz;
+#ifdef _P64 // Add padding so that the value is 64-byte aligned
+    // (8 pointers of 8 bytes each) - (4 other pointers in struct)
+    void *_padding[8 - 4];
+#else
+    // (16 pointers of 4 bytes each) - (4 other pointers in struct)
+    void *_padding[16 - 4];
+#endif
+    //struct jl_taggedvalue_t <>;
+    union {
+        uintptr_t header;
+        struct {
+            uintptr_t gc:2;
+        } bits;
+    };
+    // must be 64-byte aligned here, in 32 & 64 bit modes
+} bigval_t;
 
 // pool page metadata
 typedef struct _jl_gc_pagemeta_t {
@@ -434,6 +280,13 @@ typedef struct {
     pagetable1_t *meta1[REGION2_PG_COUNT];
 } pagetable_t;
 
+typedef struct {
+    _Atomic(size_t) bytes_mapped;
+    _Atomic(size_t) bytes_resident;
+    _Atomic(size_t) heap_size;
+    _Atomic(size_t) heap_target;
+} gc_heapstatus_t;
+
 #define GC_PAGE_UNMAPPED        0
 #define GC_PAGE_ALLOCATED       1
 #define GC_PAGE_LAZILY_FREED    2
@@ -543,11 +396,7 @@ STATIC_INLINE unsigned ffs_u32(uint32_t bitvec)
 }
 #endif
 
-extern pagetable_t memory_map;
-extern bigval_t *big_objects_marked;
-extern arraylist_t finalizer_list_marked;
-extern arraylist_t to_finalize;
-extern jl_mutex_t finalizers_lock;
+extern bigval_t *oldest_generation_of_bigvals;
 extern int64_t buffered_pages;
 extern int gc_first_tid;
 extern gc_heapstatus_t gc_heap_stats;
@@ -579,10 +428,20 @@ STATIC_INLINE int gc_is_parallel_collector_thread(int tid) JL_NOTSAFEPOINT
     return tid >= gc_first_tid && tid <= gc_last_parallel_collector_thread_id();
 }
 
+STATIC_INLINE int gc_is_concurrent_collector_thread(int tid) JL_NOTSAFEPOINT
+{
+    if (jl_n_sweepthreads == 0) {
+        return 0;
+    }
+    int last_parallel_collector_thread_id = gc_last_parallel_collector_thread_id();
+    int concurrent_collector_thread_id = last_parallel_collector_thread_id + 1;
+    return tid == concurrent_collector_thread_id;
+}
+
 STATIC_INLINE int gc_random_parallel_collector_thread_id(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     assert(jl_n_markthreads > 0);
-    int v = gc_first_tid + (int)cong(jl_n_markthreads - 1, &ptls->rngseed);
+    int v = gc_first_tid + (int)cong(jl_n_markthreads, &ptls->rngseed); // cong is [0, n)
     assert(v >= gc_first_tid && v <= gc_last_parallel_collector_thread_id());
     return v;
 }
@@ -615,40 +474,54 @@ STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) JL_NOTSAFEPOIN
     return (jl_taggedvalue_t*)(p->data + p->fl_end_offset);
 }
 
-STATIC_INLINE int gc_old(uintptr_t bits) JL_NOTSAFEPOINT
+FORCE_INLINE void gc_big_object_unlink(const bigval_t *node) JL_NOTSAFEPOINT
 {
-    return (bits & GC_OLD) != 0;
+    assert(node != oldest_generation_of_bigvals);
+    assert(node->header != gc_bigval_sentinel_tag);
+    assert(node->prev != NULL);
+    if (node->next != NULL) {
+        node->next->prev = node->prev;
+    }
+    node->prev->next = node->next;
 }
 
-STATIC_INLINE uintptr_t gc_set_bits(uintptr_t tag, int bits) JL_NOTSAFEPOINT
+FORCE_INLINE void gc_big_object_link(bigval_t *sentinel_node, bigval_t *node) JL_NOTSAFEPOINT
 {
-    return (tag & ~(uintptr_t)3) | bits;
-}
-
-NOINLINE uintptr_t gc_get_stack_ptr(void);
-
-STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) JL_NOTSAFEPOINT
-{
-    *hdr->prev = hdr->next;
-    if (hdr->next) {
-        hdr->next->prev = hdr->prev;
+    assert(sentinel_node != NULL);
+    assert(sentinel_node->header == gc_bigval_sentinel_tag);
+    assert(sentinel_node->prev == NULL);
+    assert(node->header != gc_bigval_sentinel_tag);
+    // a new node gets linked in at the head of the list
+    node->next = sentinel_node->next;
+    node->prev = sentinel_node;
+    if (sentinel_node->next != NULL) {
+        sentinel_node->next->prev = node;
     }
+    sentinel_node->next = node;
 }
 
-STATIC_INLINE void gc_big_object_link(bigval_t *hdr, bigval_t **list) JL_NOTSAFEPOINT
+// Must be kept in sync with `base/timing.jl`
+#define FULL_SWEEP_REASON_SWEEP_ALWAYS_FULL (0)
+#define FULL_SWEEP_REASON_FORCED_FULL_SWEEP (1)
+#define FULL_SWEEP_REASON_USER_MAX_EXCEEDED (2)
+#define FULL_SWEEP_REASON_LARGE_PROMOTION_RATE (3)
+#define FULL_SWEEP_NUM_REASONS (4)
+
+extern JL_DLLEXPORT uint64_t jl_full_sweep_reasons[FULL_SWEEP_NUM_REASONS];
+STATIC_INLINE void gc_count_full_sweep_reason(int reason) JL_NOTSAFEPOINT
 {
-    hdr->next = *list;
-    hdr->prev = list;
-    if (*list)
-        (*list)->prev = &hdr->next;
-    *list = hdr;
+    assert(reason >= 0 && reason < FULL_SWEEP_NUM_REASONS);
+    jl_full_sweep_reasons[reason]++;
 }
 
+extern uv_mutex_t gc_perm_lock;
 extern uv_mutex_t gc_threads_lock;
 extern uv_cond_t gc_threads_cond;
 extern uv_sem_t gc_sweep_assists_needed;
 extern _Atomic(int) gc_n_threads_marking;
 extern _Atomic(int) gc_n_threads_sweeping;
+extern _Atomic(int) n_threads_running;
+extern uv_barrier_t thread_init_done;
 void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
 void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
 void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
@@ -662,6 +535,7 @@ void jl_gc_debug_init(void);
 
 // GC pages
 
+extern uv_mutex_t gc_pages_lock;
 void jl_gc_init_page(void) JL_NOTSAFEPOINT;
 NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT;
 void jl_gc_free_page(jl_gc_pagemeta_t *p) JL_NOTSAFEPOINT;
@@ -852,6 +726,7 @@ void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect
 }
 #endif
 
-#endif // !MMTK_GC
-
 #endif
+
+
+#endif // !MMTK_GC
diff --git a/src/gc-tls-common.h b/src/gc-tls-common.h
new file mode 100644
index 0000000000000..ba36f5c1c238e
--- /dev/null
+++ b/src/gc-tls-common.h
@@ -0,0 +1,52 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Meant to be included in "julia_threads.h"
+#ifndef JL_GC_TLS_COMMON_H
+#define JL_GC_TLS_COMMON_H
+
+#include "julia_atomics.h"
+
+// GC threading ------------------------------------------------------------------
+
+#include "arraylist.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    // variable for tracking weak references
+    small_arraylist_t weak_refs;
+    // live tasks started on this thread
+    // that are holding onto a stack from the pool
+    small_arraylist_t live_tasks;
+
+    // variables for tracking malloc'd arrays
+    struct _mallocmemory_t *mallocarrays;
+    struct _mallocmemory_t *mafreelist;
+
+#define JL_N_STACK_POOLS 16
+    small_arraylist_t free_stacks[JL_N_STACK_POOLS];
+} jl_thread_heap_common_t;
+
+typedef struct {
+    _Atomic(int64_t) allocd;
+    _Atomic(int64_t) pool_live_bytes;
+    _Atomic(uint64_t) malloc;
+    _Atomic(uint64_t) realloc;
+    _Atomic(uint64_t) poolalloc;
+    _Atomic(uint64_t) bigalloc;
+    _Atomic(int64_t) free_acc;
+    _Atomic(uint64_t) alloc_acc;
+} jl_thread_gc_num_common_t;
+
+typedef struct {
+    jl_thread_heap_common_t heap;
+    jl_thread_gc_num_common_t gc_num;
+} jl_gc_tls_states_common_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_GC_TLS_COMMON_H
diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h
new file mode 100644
index 0000000000000..7b1b249cd8ae3
--- /dev/null
+++ b/src/gc-tls-mmtk.h
@@ -0,0 +1,21 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifdef MMTK_GC
+
+#include <assert.h>
+#include "mmtkMutator.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    MMTkMutatorContext mmtk_mutator;
+    size_t malloc_sz_since_last_poll;
+} jl_gc_tls_states_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // MMTK_GC
diff --git a/src/gc-tls.h b/src/gc-tls.h
index 629ce53fbc886..d44b458842ae1 100644
--- a/src/gc-tls.h
+++ b/src/gc-tls.h
@@ -1,5 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#ifndef MMTK_GC
+
 // Meant to be included in "julia_threads.h"
 #ifndef JL_GC_TLS_H
 #define JL_GC_TLS_H
@@ -21,18 +23,8 @@ typedef struct {
 } jl_gc_pool_t;
 
 typedef struct {
-    // variable for tracking weak references
-    small_arraylist_t weak_refs;
-    // live tasks started on this thread
-    // that are holding onto a stack from the pool
-    small_arraylist_t live_tasks;
-
-    // variables for tracking malloc'd arrays
-    struct _mallocarray_t *mallocarrays;
-    struct _mallocarray_t *mafreelist;
-
-    // variables for tracking big objects
-    struct _bigval_t *big_objects;
+    // variable for tracking young (i.e. not in `GC_OLD_MARKED`/last generation) large objects
+    struct _bigval_t *young_generation_of_bigvals;
 
     // lower bound of the number of pointers inside remembered values
     int remset_nptr;
@@ -42,22 +34,8 @@ typedef struct {
     // variables for allocating objects from pools
 #define JL_GC_N_MAX_POOLS 51 // conservative. must be kept in sync with `src/julia_internal.h`
     jl_gc_pool_t norm_pools[JL_GC_N_MAX_POOLS];
-
-#define JL_N_STACK_POOLS 16
-    small_arraylist_t free_stacks[JL_N_STACK_POOLS];
 } jl_thread_heap_t;
 
-typedef struct {
-    _Atomic(int64_t) allocd;
-    _Atomic(int64_t) pool_live_bytes;
-    _Atomic(uint64_t) malloc;
-    _Atomic(uint64_t) realloc;
-    _Atomic(uint64_t) poolalloc;
-    _Atomic(uint64_t) bigalloc;
-    _Atomic(int64_t) free_acc;
-    _Atomic(uint64_t) alloc_acc;
-} jl_thread_gc_num_t;
-
 typedef struct {
     ws_queue_t chunk_queue;
     ws_queue_t ptr_queue;
@@ -69,17 +47,6 @@ typedef struct {
     size_t perm_scanned_bytes;
     // thread local increment of `scanned_bytes`
     size_t scanned_bytes;
-    // Number of queued big objects (<= 1024)
-    size_t nbig_obj;
-    // Array of queued big objects to be moved between the young list
-    // and the old list.
-    // A set low bit means that the object should be moved from the old list
-    // to the young list (`mark_reset_age`).
-    // Objects can only be put into this list when the mark bit is flipped to
-    // `1` (atomically). Combining with the sync after marking,
-    // this makes sure that a single objects can only appear once in
-    // the lists (the mark bit cannot be flipped to `0` without sweeping)
-    void *big_obj[1024];
 } jl_gc_mark_cache_t;
 
 typedef struct {
@@ -89,7 +56,6 @@ typedef struct {
 typedef struct {
     jl_thread_heap_t heap;
     jl_gc_page_stack_t page_metadata_allocd;
-    jl_thread_gc_num_t gc_num;
     jl_gc_markqueue_t mark_queue;
     jl_gc_mark_cache_t gc_cache;
     _Atomic(size_t) gc_sweeps_requested;
@@ -101,3 +67,5 @@ typedef struct {
 #endif
 
 #endif // JL_GC_TLS_H
+
+#endif // MMTK_GC
diff --git a/src/genericmemory.c b/src/genericmemory.c
index 6851e9131e534..ba5c46e79421d 100644
--- a/src/genericmemory.c
+++ b/src/genericmemory.c
@@ -54,7 +54,6 @@ jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t is
         tot = sizeof(jl_genericmemory_t) + sizeof(void*);
     }
     m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype);
-
     if (pooled) {
         data = (char*)m + JL_SMALL_BYTE_ALIGNMENT;
     }
@@ -271,7 +270,7 @@ JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destda
         _Atomic(void*) * dest_p = (_Atomic(void*)*)destdata;
         _Atomic(void*) * src_p = (_Atomic(void*)*)srcdata;
         jl_value_t *owner = jl_genericmemory_owner(dest);
-        mmtk_gc_wb(owner, NULL);
+        jl_gc_wb(owner, NULL); // FIXME: needs to be added here since the check below doesn't apply to MMTk
         if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
             jl_value_t *src_owner = jl_genericmemory_owner(src);
             ssize_t done = 0;
diff --git a/src/gf.c b/src/gf.c
index 659261d434659..fc2e62ebff96b 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -700,40 +700,38 @@ int foreach_mtable_in_module(
         if ((void*)b == jl_nothing)
             break;
         jl_sym_t *name = b->globalref->name;
-        if (jl_atomic_load_relaxed(&b->owner) == b && b->constp) {
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-            if (v) {
-                jl_value_t *uw = jl_unwrap_unionall(v);
-                if (jl_is_datatype(uw)) {
-                    jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
-                    if (tn->module == m && tn->name == name && tn->wrapper == v) {
-                        // this is the original/primary binding for the type (name/wrapper)
-                        jl_methtable_t *mt = tn->mt;
-                        if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
-                            assert(mt->module == m);
-                            if (!visit(mt, env))
-                                return 0;
-                        }
-                    }
-                }
-                else if (jl_is_module(v)) {
-                    jl_module_t *child = (jl_module_t*)v;
-                    if (child != m && child->parent == m && child->name == name) {
-                        // this is the original/primary binding for the submodule
-                        if (!foreach_mtable_in_module(child, visit, env))
-                            return 0;
-                    }
-                }
-                else if (jl_is_mtable(v)) {
-                    jl_methtable_t *mt = (jl_methtable_t*)v;
-                    if (mt->module == m && mt->name == name) {
-                        // this is probably an external method table here, so let's
-                        // assume so as there is no way to precisely distinguish them
+        jl_value_t *v = jl_get_binding_value_if_const(b);
+        if (v) {
+            jl_value_t *uw = jl_unwrap_unionall(v);
+            if (jl_is_datatype(uw)) {
+                jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
+                if (tn->module == m && tn->name == name && tn->wrapper == v) {
+                    // this is the original/primary binding for the type (name/wrapper)
+                    jl_methtable_t *mt = tn->mt;
+                    if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
+                        assert(mt->module == m);
                         if (!visit(mt, env))
                             return 0;
                     }
                 }
             }
+            else if (jl_is_module(v)) {
+                jl_module_t *child = (jl_module_t*)v;
+                if (child != m && child->parent == m && child->name == name) {
+                    // this is the original/primary binding for the submodule
+                    if (!foreach_mtable_in_module(child, visit, env))
+                        return 0;
+                }
+            }
+            else if (jl_is_mtable(v)) {
+                jl_methtable_t *mt = (jl_methtable_t*)v;
+                if (mt->module == m && mt->name == name) {
+                    // this is probably an external method table here, so let's
+                    // assume so as there is no way to precisely distinguish them
+                    if (!visit(mt, env))
+                        return 0;
+                }
+            }
         }
         table = jl_atomic_load_relaxed(&m->bindings);
     }
@@ -1362,8 +1360,7 @@ static inline jl_typemap_entry_t *lookup_leafcache(jl_genericmemory_t *leafcache
     }
     return NULL;
 }
-
-static jl_method_instance_t *cache_method(
+jl_method_instance_t *cache_method(
         jl_methtable_t *mt, _Atomic(jl_typemap_t*) *cache, jl_value_t *parent JL_PROPAGATES_ROOT,
         jl_tupletype_t *tt, // the original tupletype of the signature
         jl_method_t *definition,
@@ -1709,7 +1706,7 @@ static void method_overwrite(jl_typemap_entry_t *newentry, jl_method_t *oldvalue
         jl_printf(s, ".\n");
         jl_uv_flush(s);
     }
-    if (jl_generating_output()) {
+    if (jl_generating_output() && jl_options.incremental) {
         jl_printf(JL_STDERR, "ERROR: Method overwriting is not permitted during Module precompilation. Use `__precompile__(false)` to opt-out of precompilation.\n");
         jl_throw(jl_precompilable_error);
     }
@@ -2337,7 +2334,7 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
     JL_GC_POP();
 }
 
-static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args, size_t world)
+static void JL_NORETURN jl_method_error_bare(jl_value_t *f, jl_value_t *args, size_t world)
 {
     if (jl_methoderror_type) {
         jl_value_t *e = jl_new_struct_uninit(jl_methoderror_type);
@@ -2362,7 +2359,7 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
     // not reached
 }
 
-void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world)
+void JL_NORETURN jl_method_error(jl_value_t *f, jl_value_t **args, size_t na, size_t world)
 {
     jl_value_t *argtup = jl_f_tuple(NULL, args, na - 1);
     JL_GC_PUSH1(&argtup);
@@ -2413,7 +2410,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t na
 // spvals is any matched static parameter values, m is the Method,
 // full is a boolean indicating if that method fully covers the input
 //
-// lim is the max # of methods to return. if there are more, returns jl_false.
+// lim is the max # of methods to return. if there are more, returns jl_nothing.
 // Negative values stand for no limit.
 // Unless lim == -1, remove matches that are unambiguously covered by earlier ones
 JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
@@ -2433,7 +2430,7 @@ JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *
     return ml_matches((jl_methtable_t*)mt, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
 }
 
-jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
+JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
 {
     // one unspecialized version of a function can be shared among all cached specializations
     if (!jl_is_method(def) || def->source == NULL) {
@@ -2516,12 +2513,32 @@ jl_code_instance_t *jl_method_inferred_with_abi(jl_method_instance_t *mi JL_PROP
 
 jl_mutex_t precomp_statement_out_lock;
 
-static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time)
+_Atomic(uint8_t) jl_force_trace_compile_timing_enabled = 0;
+
+/**
+ * @brief Enable force trace compile to stderr with timing.
+ */
+JL_DLLEXPORT void jl_force_trace_compile_timing_enable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_compile`.
+    jl_atomic_fetch_add(&jl_force_trace_compile_timing_enabled, 1);
+}
+/**
+ * @brief Disable force trace compile to stderr with timing.
+ */
+JL_DLLEXPORT void jl_force_trace_compile_timing_disable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_compile`.
+    jl_atomic_fetch_add(&jl_force_trace_compile_timing_enabled, -1);
+}
+
+static void record_precompile_statement(jl_method_instance_t *mi, double compilation_time, int is_recompile)
 {
     static ios_t f_precompile;
     static JL_STREAM* s_precompile = NULL;
     jl_method_t *def = mi->def.method;
-    if (jl_options.trace_compile == NULL)
+    uint8_t force_trace_compile = jl_atomic_load_relaxed(&jl_force_trace_compile_timing_enabled);
+    if (force_trace_compile == 0 && jl_options.trace_compile == NULL)
         return;
     if (!jl_is_method(def))
         return;
@@ -2531,7 +2548,7 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila
     JL_LOCK(&precomp_statement_out_lock);
     if (s_precompile == NULL) {
         const char *t = jl_options.trace_compile;
-        if (!strncmp(t, "stderr", 6)) {
+        if (force_trace_compile || !strncmp(t, "stderr", 6)) {
             s_precompile = JL_STDERR;
         }
         else {
@@ -2541,17 +2558,80 @@ static void record_precompile_statement(jl_method_instance_t *mi, double compila
         }
     }
     if (!jl_has_free_typevars(mi->specTypes)) {
-        if (jl_options.trace_compile_timing)
+        if (is_recompile && s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF)
+            jl_printf(s_precompile, "\e[33m");
+        if (force_trace_compile || jl_options.trace_compile_timing)
             jl_printf(s_precompile, "#= %6.1f ms =# ", compilation_time / 1e6);
         jl_printf(s_precompile, "precompile(");
         jl_static_show(s_precompile, mi->specTypes);
-        jl_printf(s_precompile, ")\n");
+        jl_printf(s_precompile, ")");
+        if (is_recompile) {
+            if (s_precompile == JL_STDERR && jl_options.color != JL_OPTIONS_COLOR_OFF) {
+                jl_printf(s_precompile, "\e[0m");
+            }
+            else {
+                jl_printf(s_precompile, " # recompile");
+            }
+        }
+        jl_printf(s_precompile, "\n");
         if (s_precompile != JL_STDERR)
             ios_flush(&f_precompile);
     }
     JL_UNLOCK(&precomp_statement_out_lock);
 }
 
+jl_mutex_t dispatch_statement_out_lock;
+
+_Atomic(uint8_t) jl_force_trace_dispatch_enabled = 0;
+
+/**
+ * @brief Enable force trace dispatch to stderr.
+ */
+JL_DLLEXPORT void jl_force_trace_dispatch_enable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_dispatch`.
+    jl_atomic_fetch_add(&jl_force_trace_dispatch_enabled, 1);
+}
+/**
+ * @brief Disable force trace dispatch to stderr.
+ */
+JL_DLLEXPORT void jl_force_trace_dispatch_disable(void)
+{
+    // Increment the flag to allow reentrant callers to `@trace_dispatch`.
+    jl_atomic_fetch_add(&jl_force_trace_dispatch_enabled, -1);
+}
+
+static void record_dispatch_statement(jl_method_instance_t *mi)
+{
+    static ios_t f_dispatch;
+    static JL_STREAM* s_dispatch = NULL;
+    jl_method_t *def = mi->def.method;
+    if (!jl_is_method(def))
+        return;
+
+    uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+    JL_LOCK(&dispatch_statement_out_lock);
+    if (s_dispatch == NULL) {
+        const char *t = jl_options.trace_dispatch;
+        if (force_trace_dispatch || !strncmp(t, "stderr", 6)) {
+            s_dispatch = JL_STDERR;
+        }
+        else {
+            if (ios_file(&f_dispatch, t, 1, 1, 1, 1) == NULL)
+                jl_errorf("cannot open dispatch statement file \"%s\" for writing", t);
+            s_dispatch = (JL_STREAM*) &f_dispatch;
+        }
+    }
+    if (!jl_has_free_typevars(mi->specTypes)) {
+        jl_printf(s_dispatch, "precompile(");
+        jl_static_show(s_dispatch, mi->specTypes);
+        jl_printf(s_dispatch, ")\n");
+        if (s_dispatch != JL_STDERR)
+            ios_flush(&f_dispatch);
+    }
+    JL_UNLOCK(&dispatch_statement_out_lock);
+}
+
 // If waitcompile is 0, this will return NULL if compiling is on-going in the JIT. This is
 // useful for the JIT itself, since it just doesn't cause redundant work or missed updates,
 // but merely causes it to look into the current JIT worklist.
@@ -2676,7 +2756,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                     // unspec is probably not specsig, but might be using specptr
                     jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & ~0b1); // clear specsig flag
                     jl_mi_cache_insert(mi, codeinst);
-                    record_precompile_statement(mi, 0);
+                    record_precompile_statement(mi, 0, 0);
                     return codeinst;
                 }
             }
@@ -2693,7 +2773,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
                 0, 1, ~(size_t)0, 0, jl_nothing, 0, NULL);
             jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
             jl_mi_cache_insert(mi, codeinst);
-            record_precompile_statement(mi, 0);
+            record_precompile_statement(mi, 0, 0);
             return codeinst;
         }
         if (compile_option == JL_OPTIONS_COMPILE_OFF) {
@@ -2742,7 +2822,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             codeinst = NULL;
         }
         else if (did_compile && codeinst->owner == jl_nothing) {
-            record_precompile_statement(mi, compile_time);
+            record_precompile_statement(mi, compile_time, is_recompile);
         }
         JL_GC_POP();
     }
@@ -2901,7 +2981,7 @@ jl_method_instance_t *jl_normalize_to_compilable_mi(jl_method_instance_t *mi JL_
 }
 
 // return a MethodInstance for a compileable method_match
-jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
+JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache)
 {
     jl_method_t *m = match->method;
     jl_svec_t *env = match->sparams;
@@ -3059,7 +3139,8 @@ static void jl_compile_now(jl_method_instance_t *mi)
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world)
 {
     size_t tworld = jl_typeinf_world;
-    jl_atomic_store_relaxed(&mi->precompiled, 1);
+    uint8_t miflags = jl_atomic_load_relaxed(&mi->flags) | JL_MI_FLAGS_MASK_PRECOMPILED;
+    jl_atomic_store_relaxed(&mi->flags, miflags);
     if (jl_generating_output()) {
         jl_compile_now(mi);
         // In addition to full compilation of the compilation-signature, if `types` is more specific (e.g. due to nospecialize),
@@ -3074,7 +3155,8 @@ JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tuplet
             types2 = jl_type_intersection_env((jl_value_t*)types, (jl_value_t*)mi->def.method->sig, &tpenv2);
             jl_method_instance_t *mi2 = jl_specializations_get_linfo(mi->def.method, (jl_value_t*)types2, tpenv2);
             JL_GC_POP();
-            jl_atomic_store_relaxed(&mi2->precompiled, 1);
+            miflags = jl_atomic_load_relaxed(&mi2->flags) | JL_MI_FLAGS_MASK_PRECOMPILED;
+            jl_atomic_store_relaxed(&mi2->flags, miflags);
             if (jl_rettype_inferred_native(mi2, world, world) == jl_nothing)
                 (void)jl_type_infer(mi2, world, SOURCE_MODE_NOT_REQUIRED);
             if (jl_typeinf_func && jl_atomic_load_relaxed(&mi->def.method->primary_world) <= tworld) {
@@ -3103,6 +3185,21 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
     return 1;
 }
 
+JL_DLLEXPORT int jl_add_entrypoint(jl_tupletype_t *types)
+{
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    size_t min_valid = 0;
+    size_t max_valid = ~(size_t)0;
+    jl_method_instance_t *mi = jl_get_compile_hint_specialization(types, world, &min_valid, &max_valid, 1);
+    if (mi == NULL)
+        return 0;
+    JL_GC_PROMISE_ROOTED(mi);
+    if (jl_generating_output() && jl_options.trim) {
+        arraylist_push(jl_entrypoint_mis, mi);
+    }
+    return 1;
+}
+
 // add type of `f` to front of argument tuple type
 jl_value_t *jl_argtype_with_function(jl_value_t *f, jl_value_t *types0)
 {
@@ -3335,6 +3432,17 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
             jl_method_error(F, args, nargs, world);
             // unreachable
         }
+        // mfunc is about to be dispatched
+        uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+        if (force_trace_dispatch || jl_options.trace_dispatch != NULL) {
+            uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags);
+            uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED;
+            if (!was_dispatched) {
+                miflags |= JL_MI_FLAGS_MASK_DISPATCHED;
+                jl_atomic_store_relaxed(&mfunc->flags, miflags);
+                record_dispatch_statement(mfunc);
+            }
+        }
     }
 
 #ifdef JL_TRACE
@@ -3457,6 +3565,16 @@ jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
     }
     JL_GC_PROMISE_ROOTED(mfunc);
+    uint8_t force_trace_dispatch = jl_atomic_load_relaxed(&jl_force_trace_dispatch_enabled);
+    if (force_trace_dispatch || jl_options.trace_dispatch != NULL) {
+        uint8_t miflags = jl_atomic_load_relaxed(&mfunc->flags);
+        uint8_t was_dispatched = miflags & JL_MI_FLAGS_MASK_DISPATCHED;
+        if (!was_dispatched) {
+            miflags |= JL_MI_FLAGS_MASK_DISPATCHED;
+            jl_atomic_store_relaxed(&mfunc->flags, miflags);
+            record_dispatch_statement(mfunc);
+        }
+    }
     size_t world = jl_current_task->world_age;
     return _jl_invoke(gf, args, nargs - 1, mfunc, world);
 }
@@ -3669,7 +3787,7 @@ static int sort_mlmatches(jl_array_t *t, size_t idx, arraylist_t *visited, array
         int msp2 = !msp && jl_method_morespecific(m2, m);
         if (!msp) {
             if (subt || !include_ambiguous || (lim != -1 && msp2)) {
-                if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
+                if (subt2 || ((lim != -1 || (!include_ambiguous && !msp2)) && jl_subtype((jl_value_t*)ti, m2->sig))) {
                     // this may be filtered out as fully intersected, if applicable later
                     mayexclude = 1;
                 }
diff --git a/src/init.c b/src/init.c
index 1de781b150ddd..413d4e8055e54 100644
--- a/src/init.c
+++ b/src/init.c
@@ -44,6 +44,7 @@ extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
 
 // list of modules being deserialized with __init__ methods
 jl_array_t *jl_module_init_order;
+arraylist_t *jl_entrypoint_mis;
 
 JL_DLLEXPORT size_t jl_page_size;
 
@@ -64,15 +65,20 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
     // threads since it seems to return bogus values for master thread on Linux
     // and possibly OSX.
     if (!ismaster) {
-#  if defined(_OS_LINUX_)
+#  if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
         pthread_attr_t attr;
+#if defined(_OS_FREEBSD_)
+        pthread_attr_init(&attr);
+        pthread_attr_get_np(pthread_self(), &attr);
+#else
         pthread_getattr_np(pthread_self(), &attr);
+#endif
         void *stackaddr;
         size_t stacksize;
         pthread_attr_getstack(&attr, &stackaddr, &stacksize);
         pthread_attr_destroy(&attr);
-        *stack_hi = stackaddr;
-        *stack_lo = (char*)stackaddr - stacksize;
+        *stack_lo = stackaddr;
+        *stack_hi = (char*)stackaddr + stacksize;
         return;
 #  elif defined(_OS_DARWIN_)
         extern void *pthread_get_stackaddr_np(pthread_t thread);
@@ -80,19 +86,8 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
         pthread_t thread = pthread_self();
         void *stackaddr = pthread_get_stackaddr_np(thread);
         size_t stacksize = pthread_get_stacksize_np(thread);
-        *stack_hi = stackaddr;
         *stack_lo = (char*)stackaddr - stacksize;
-        return;
-#  elif defined(_OS_FREEBSD_)
-        pthread_attr_t attr;
-        pthread_attr_init(&attr);
-        pthread_attr_get_np(pthread_self(), &attr);
-        void *stackaddr;
-        size_t stacksize;
-        pthread_attr_getstack(&attr, &stackaddr, &stacksize);
-        pthread_attr_destroy(&attr);
         *stack_hi = stackaddr;
-        *stack_lo = (char*)stackaddr - stacksize;
         return;
 #  else
 #      warning "Getting precise stack size for thread is not supported."
@@ -291,7 +286,8 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode) JL_NOTSAFEPOINT_ENTER
     JL_STDOUT = (uv_stream_t*) STDOUT_FILENO;
     JL_STDERR = (uv_stream_t*) STDERR_FILENO;
 
-    jl_gc_run_all_finalizers(ct);
+    if (ct)
+        jl_gc_run_all_finalizers(ct);
 
     uv_loop_t *loop = jl_global_event_loop();
     if (loop != NULL) {
@@ -726,6 +722,7 @@ static void restore_fp_env(void)
 static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct);
 
 JL_DLLEXPORT int jl_default_debug_info_kind;
+JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
 
 static void init_global_mutexes(void) {
     JL_MUTEX_INIT(&jl_modules_mutex, "jl_modules_mutex");
@@ -745,6 +742,9 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     // Make sure we finalize the tls callback before starting any threads.
     (void)jl_get_pgcstack();
 
+    // initialize symbol-table lock
+    uv_mutex_init(&symtab_lock);
+
     // initialize backtraces
     jl_init_profile_lock();
 #ifdef _OS_WINDOWS_
@@ -827,7 +827,6 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
     arraylist_push(&eytzinger_image_tree, (void*)1); // outside image
 
     jl_ptls_t ptls = jl_init_threadtls(0);
-
 #pragma GCC diagnostic push
 #if defined(_COMPILER_GCC_) && __GNUC__ >= 12
 #pragma GCC diagnostic ignored "-Wdangling-pointer"
@@ -844,8 +843,10 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
     JL_TIMING(JULIA_INIT, JULIA_INIT);
     jl_resolve_sysimg_location(rel);
     // loads sysimg if available, and conditionally sets jl_options.cpu_target
-    if (rel == JL_IMAGE_IN_MEMORY)
+    if (rel == JL_IMAGE_IN_MEMORY) {
         jl_set_sysimg_so(jl_exe_handle);
+        jl_options.image_file = jl_options.julia_bin;
+    }
     else if (jl_options.image_file)
         jl_preload_sysimg_so(jl_options.image_file);
     if (jl_options.cpu_target == NULL)
@@ -879,16 +880,15 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         jl_n_markthreads = 0;
         jl_n_sweepthreads = 0;
         jl_n_gcthreads = 0;
-        jl_n_threads_per_pool[0] = 1;
-        jl_n_threads_per_pool[1] = 0;
+        jl_n_threads_per_pool[0] = 0; // Interactive threadpool
+        jl_n_threads_per_pool[1] = 1; // Default threadpool
     } else {
         post_image_load_hooks();
     }
     jl_start_threads();
+    jl_start_gc_threads();
+    uv_barrier_wait(&thread_init_done);
 
-#ifdef MMTK_GC
-    mmtk_initialize_collection((void *)ptls);
-#endif
     jl_gc_enable(1);
 
     if (jl_options.image_file && (!jl_generating_output() || jl_options.incremental) && jl_module_init_order) {
@@ -903,6 +903,11 @@ static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_
         JL_GC_POP();
     }
 
+    if (jl_options.trim) {
+        jl_entrypoint_mis = (arraylist_t *)malloc_s(sizeof(arraylist_t));
+        arraylist_new(jl_entrypoint_mis, 0);
+    }
+
     if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON)
         jl_install_sigint_handler();
 }
diff --git a/src/interpreter.c b/src/interpreter.c
index 4efcf2bdeef89..a8d371eebb2ce 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -94,9 +94,7 @@ static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
             jl_error("method: invalid declaration");
         }
         jl_binding_t *b = jl_get_binding_for_method_def(modu, fname);
-        _Atomic(jl_value_t*) *bp = &b->value;
-        jl_value_t *gf = jl_generic_function_def(fname, modu, bp, b);
-        return gf;
+        return jl_declare_const_gf(b, modu, fname);
     }
 
     jl_value_t *atypes = NULL, *meth = NULL, *fname = NULL;
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index ad89abf6ca1a2..09916297e16ff 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -405,10 +405,11 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
     }
     else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
         assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
-        AllocaInst *cast = ctx.builder.CreateAlloca(ty);
+        Align align = std::max(DL.getPrefTypeAlign(ty), DL.getPrefTypeAlign(to));
+        AllocaInst *cast = emit_static_alloca(ctx, ty, align);
         setName(ctx.emission_context, cast, "coercion");
-        ctx.builder.CreateStore(unboxed, cast);
-        unboxed = ctx.builder.CreateLoad(to, cast);
+        ctx.builder.CreateAlignedStore(unboxed, cast, align);
+        unboxed = ctx.builder.CreateAlignedLoad(to, cast, align);
     }
     else if (frompointer) {
         Type *INTT_to = INTT(to, DL);
@@ -440,14 +441,14 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
         // up being dead code, and type inference knows that the other
         // branch's type is the only one that matters.
         if (type_is_ghost(to)) {
-            return NULL;
+            return nullptr;
         }
         CreateTrap(ctx.builder);
         return UndefValue::get(to); // type mismatch error
     }
 
-    Constant *c = x.constant ? julia_const_to_llvm(ctx, x.constant) : NULL;
-    if (!x.ispointer() || c) { // already unboxed, but sometimes need conversion
+    Constant *c = x.constant ? julia_const_to_llvm(ctx, x.constant) : nullptr;
+    if ((x.inline_roots.empty() && !x.ispointer()) || c != nullptr) { // already unboxed, but sometimes need conversion
         Value *unboxed = c ? c : x.V;
         return emit_unboxed_coercion(ctx, to, unboxed);
     }
@@ -472,33 +473,22 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
     }
 
     unsigned alignment = julia_alignment(jt);
-    Type *ptype = to->getPointerTo();
-    if (p->getType() != ptype && isa<AllocaInst>(p)) {
-        // LLVM's mem2reg can't handle coercion if the load/store type does
-        // not match the type of the alloca. As such, it is better to
-        // perform the load using the alloca's type and then perform the
-        // appropriate coercion manually.
-        AllocaInst *AI = cast<AllocaInst>(p);
-        Type *AllocType = AI->getAllocatedType();
-        const DataLayout &DL = jl_Module->getDataLayout();
-        if (!AI->isArrayAllocation() &&
-                (AllocType->isFloatingPointTy() || AllocType->isIntegerTy() || AllocType->isPointerTy()) &&
-                (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) &&
-                DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) {
-            Instruction *load = ctx.builder.CreateAlignedLoad(AllocType, p, Align(alignment));
-            setName(ctx.emission_context, load, p->getName() + ".unbox");
-            jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
-            return emit_unboxed_coercion(ctx, to, ai.decorateInst(load));
-        }
+    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    if (!x.inline_roots.empty()) {
+        assert(x.typ == jt);
+        AllocaInst *combined = emit_static_alloca(ctx, to, Align(alignment));
+        auto combined_ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
+        recombine_value(ctx, x, combined, combined_ai, Align(alignment), false);
+        p = combined;
+        ai = combined_ai;
     }
     Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment));
     setName(ctx.emission_context, load, p->getName() + ".unbox");
-    jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
     return ai.decorateInst(load);
 }
 
 // emit code to store a raw value into a destination
-static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest, MDNode *tbaa_dest, unsigned alignment, bool isVolatile)
+static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest, MDNode *tbaa_dest, Align alignment, bool isVolatile)
 {
     if (x.isghost) {
         // this can happen when a branch yielding a different type ends
@@ -507,18 +497,25 @@ static void emit_unbox_store(jl_codectx_t &ctx, const jl_cgval_t &x, Value *dest
         return;
     }
 
+    auto dest_ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest);
+
+    if (!x.inline_roots.empty()) {
+        recombine_value(ctx, x, dest, dest_ai, alignment, isVolatile);
+        return;
+    }
+
     if (!x.ispointer()) { // already unboxed, but sometimes need conversion (e.g. f32 -> i32)
         assert(x.V);
         Value *unboxed = zext_struct(ctx, x.V);
-        StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, Align(alignment));
+        StoreInst *store = ctx.builder.CreateAlignedStore(unboxed, dest, alignment);
         store->setVolatile(isVolatile);
-        jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest);
-        ai.decorateInst(store);
+        dest_ai.decorateInst(store);
         return;
     }
 
     Value *src = data_pointer(ctx, x);
-    emit_memcpy(ctx, dest, jl_aliasinfo_t::fromTBAA(ctx, tbaa_dest), src, jl_aliasinfo_t::fromTBAA(ctx, x.tbaa), jl_datatype_size(x.typ), alignment, julia_alignment(x.typ), isVolatile);
+    auto src_ai = jl_aliasinfo_t::fromTBAA(ctx, x.tbaa);
+    emit_memcpy(ctx, dest, dest_ai, src, src_ai, jl_datatype_size(x.typ), Align(alignment), Align(julia_alignment(x.typ)), isVolatile);
 }
 
 static jl_datatype_t *staticeval_bitstype(const jl_cgval_t &targ)
@@ -692,10 +689,11 @@ static jl_cgval_t generic_cast(
             // understood that everything is implicitly rounded to 23 bits,
             // but if we start looking at more bits we need to actually do the
             // rounding first instead of carrying around incorrect low bits.
-            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType());
+            Align align(julia_alignment((jl_value_t*)jlto));
+            Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType(), align);
             setName(ctx.emission_context, jlfloattemp_var, "rounding_slot");
-            ctx.builder.CreateStore(from, jlfloattemp_var);
-            from  = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true);
+            ctx.builder.CreateAlignedStore(from, jlfloattemp_var, align);
+            from = ctx.builder.CreateAlignedLoad(from->getType(), jlfloattemp_var, align, /*force this to load from the stack*/true);
             setName(ctx.emission_context, from, "rounded");
         }
     }
@@ -767,10 +765,10 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         setName(ctx.emission_context, im1, "pointerref_offset");
         Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
-        thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
+        thePtr = emit_ptrgep(ctx, thePtr, im1);
         setName(ctx.emission_context, thePtr, "pointerref_src");
         MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
-        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, sizeof(jl_value_t*), align_nb);
+        emit_memcpy(ctx, strct, jl_aliasinfo_t::fromTBAA(ctx, tbaa), thePtr, jl_aliasinfo_t::fromTBAA(ctx, nullptr), size, Align(sizeof(jl_value_t*)), Align(align_nb));
         return mark_julia_type(ctx, strct, true, ety);
     }
     else {
@@ -799,7 +797,7 @@ static jl_cgval_t emit_runtime_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t
 static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
 {
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = argv[1];
+    jl_cgval_t x = argv[1];
     const jl_cgval_t &i = argv[2];
     const jl_cgval_t &align = argv[3];
 
@@ -822,15 +820,17 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
         return jl_cgval_t();
     }
     emit_typecheck(ctx, x, ety, "pointerset");
+    x = update_julia_type(ctx, x, ety);
+    if (x.typ == jl_bottom_type)
+        return jl_cgval_t();
 
     Value *idx = emit_unbox(ctx, ctx.types().T_size, i, (jl_value_t*)jl_long_type);
     Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(ctx.types().T_size, 1));
     setName(ctx.emission_context, im1, "pointerset_idx");
 
-    Value *thePtr;
+    Value *thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
     if (ety == (jl_value_t*)jl_any_type) {
         // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
-        thePtr = emit_unbox(ctx, ctx.types().T_size->getPointerTo(), e, e.typ);
         auto gep = ctx.builder.CreateInBoundsGEP(ctx.types().T_size, thePtr, im1);
         setName(ctx.emission_context, gep, "pointerset_ptr");
         auto val = ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), ctx.types().T_size);
@@ -839,22 +839,23 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, ArrayRef<jl_cgval_t> argv)
         jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_data);
         ai.decorateInst(store);
     }
+    else if (!x.inline_roots.empty()) {
+        recombine_value(ctx, e, thePtr, jl_aliasinfo_t(), Align(align_nb), false);
+    }
     else if (x.ispointer()) {
-        thePtr = emit_unbox(ctx, getPointerTy(ctx.builder.getContext()), e, e.typ);
         uint64_t size = jl_datatype_size(ety);
         im1 = ctx.builder.CreateMul(im1, ConstantInt::get(ctx.types().T_size,
                     LLT_ALIGN(size, jl_datatype_align(ety))));
         setName(ctx.emission_context, im1, "pointerset_offset");
-        auto gep = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1);
+        auto gep = emit_ptrgep(ctx, thePtr, im1);
         setName(ctx.emission_context, gep, "pointerset_ptr");
-        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, align_nb, julia_alignment(ety));
+        emit_memcpy(ctx, gep, jl_aliasinfo_t::fromTBAA(ctx, nullptr), x, size, Align(align_nb), Align(julia_alignment(ety)));
     }
     else {
         bool isboxed;
         Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
-            thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
             thePtr = ctx.builder.CreateInBoundsGEP(ptrty, thePtr, im1);
             typed_store(ctx, thePtr, x, jl_cgval_t(), ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
                         AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, nullptr, true, false, false, false, false, false, nullptr, "atomic_pointerset", nullptr, nullptr);
@@ -992,7 +993,7 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, ArrayRef
     bool ismodifyfield = f == atomic_pointermodify;
     const jl_cgval_t undefval;
     const jl_cgval_t &e = argv[0];
-    const jl_cgval_t &x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
+    jl_cgval_t x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
     const jl_cgval_t &y = isreplacefield || ismodifyfield ? argv[1] : undefval;
     const jl_cgval_t &ord = isreplacefield || ismodifyfield ? argv[3] : argv[2];
     const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;
@@ -1034,8 +1035,12 @@ static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, ArrayRef
         emit_error(ctx, msg);
         return jl_cgval_t();
     }
-    if (!ismodifyfield)
+    if (!ismodifyfield) {
         emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
+        x = update_julia_type(ctx, x, ety);
+        if (x.typ == jl_bottom_type)
+            return jl_cgval_t();
+    }
 
     size_t nb = jl_datatype_size(ety);
     if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index 6c40f4230ab69..d1757cadee05c 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -3,7 +3,7 @@
 #include "llvm-version.h"
 #include "platform.h"
 #include <stdint.h>
-#include <sstream>
+#include <string>
 
 #include "llvm/IR/Mangler.h"
 #include <llvm/ADT/Statistic.h>
@@ -14,6 +14,15 @@
 #include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
 #include <llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h>
 #include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.h>
+#if JL_LLVM_VERSION >= 180000
+#include <llvm/ExecutionEngine/Orc/Debugging/DebugInfoSupport.h>
+#include <llvm/ExecutionEngine/Orc/Debugging/PerfSupportPlugin.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderPerf.h>
+#endif
+#if JL_LLVM_VERSION >= 190000
+#include <llvm/ExecutionEngine/Orc/Debugging/VTuneSupportPlugin.h>
+#include <llvm/ExecutionEngine/Orc/TargetProcess/JITLoaderVTune.h>
+#endif
 #include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Support/DynamicLibrary.h>
@@ -42,7 +51,11 @@ using namespace llvm;
 #include "julia_assert.h"
 #include "processor.h"
 
+#if JL_LLVM_VERSION >= 180000
+# include <llvm/ExecutionEngine/Orc/Debugging/DebuggerSupportPlugin.h>
+#else
 # include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
+#endif
 # include <llvm/ExecutionEngine/JITLink/EHFrameSupport.h>
 # include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
 # include <llvm/ExecutionEngine/Orc/MapperJITLinkMemoryManager.h>
@@ -138,13 +151,14 @@ void jl_dump_llvm_opt_impl(void *s)
     **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (ios_t*)s;
 }
 
+#ifndef JL_USE_JITLINK
 static int jl_add_to_ee(
         orc::ThreadSafeModule &M,
         const StringMap<orc::ThreadSafeModule*> &NewExports,
         DenseMap<orc::ThreadSafeModule*, int> &Queued,
         SmallVectorImpl<orc::ThreadSafeModule*> &Stack) JL_NOTSAFEPOINT;
+#endif
 static void jl_decorate_module(Module &M) JL_NOTSAFEPOINT;
-static uint64_t getAddressForFunction(StringRef fname) JL_NOTSAFEPOINT;
 
 void jl_link_global(GlobalVariable *GV, void *addr) JL_NOTSAFEPOINT
 {
@@ -173,23 +187,6 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals) JL_NOTSAFEPOINT
     }
 }
 
-// used for image_codegen, where we keep all the gvs external
-// so we can't jit them directly into each module
-static orc::ThreadSafeModule jl_get_globals_module(orc::ThreadSafeContext &ctx, const DataLayout &DL, const Triple &T, std::map<void *, GlobalVariable*> &globals) JL_NOTSAFEPOINT
-{
-    auto lock = ctx.getLock();
-    auto GTSM = jl_create_ts_module("globals", ctx, DL, T);
-    auto GM = GTSM.getModuleUnlocked();
-    for (auto &global : globals) {
-        auto GV = global.second;
-        auto GV2 = new GlobalVariable(*GM, GV->getValueType(), GV->isConstant(), GlobalValue::ExternalLinkage, literal_static_pointer_val(global.first, GV->getValueType()), GV->getName(), nullptr, GV->getThreadLocalMode(), GV->getAddressSpace(), false);
-        GV2->copyAttributesFrom(GV);
-        GV2->setDSOLocal(true);
-        GV2->setAlignment(GV->getAlign());
-    }
-    return GTSM;
-}
-
 // this generates llvm code for the lambda info
 // and adds the result to the jitlayers
 // (and the shadow module),
@@ -234,8 +231,21 @@ static jl_callptr_t _jl_compile_codeinst(
         // to ensure that the globals are defined when they are compiled.
         if (params.imaging_mode) {
             // Won't contain any PLT/dlsym calls, so no need to optimize those
-            jl_ExecutionEngine->addModule(jl_get_globals_module(params.tsctx, params.DL, params.TargetTriple, params.global_targets));
-        } else {
+            if (!params.global_targets.empty()) {
+                void **globalslots = new void*[params.global_targets.size()];
+                void **slot = globalslots;
+                for (auto &global : params.global_targets) {
+                    auto GV = global.second;
+                    *slot = global.first;
+                    jl_ExecutionEngine->addGlobalMapping(GV->getName(), (uintptr_t)slot);
+                    slot++;
+                }
+#ifdef __clang_analyzer__
+                static void **leaker = globalslots; // for the purpose of the analyzer, we need to expressly leak this variable or it thinks we forgot to free it
+#endif
+            }
+        }
+        else {
             StringMap<void*> NewGlobals;
             for (auto &global : params.global_targets) {
                 NewGlobals[global.second->getName()] = global.first;
@@ -251,6 +261,7 @@ static jl_callptr_t _jl_compile_codeinst(
             }
         }
 
+#ifndef JL_USE_JITLINK
         // Collect the exported functions from the params.compiled_functions modules,
         // which form dependencies on which functions need to be
         // compiled first. Cycles of functions are compiled together.
@@ -277,18 +288,40 @@ static jl_callptr_t _jl_compile_codeinst(
             jl_add_to_ee(M, NewExports, Queued, Stack);
             assert(Queued.empty() && Stack.empty() && !M);
         }
+#else
+        for (auto &def : params.compiled_functions) {
+            // Add the results to the execution engine now
+            orc::ThreadSafeModule &M = std::get<0>(def.second);
+            if (M)
+                jl_ExecutionEngine->addModule(std::move(M));
+        }
+#endif
         ++CompiledCodeinsts;
         MaxWorkqueueSize.updateMax(params.compiled_functions.size());
         IndirectCodeinsts += params.compiled_functions.size() - 1;
     }
 
+    // batch compile job for all new functions
+    SmallVector<StringRef> NewDefs;
+    for (auto &def : params.compiled_functions) {
+        jl_llvm_functions_t &decls = std::get<1>(def.second);
+        if (decls.functionObject != "jl_fptr_args" &&
+            decls.functionObject != "jl_fptr_sparam" &&
+            decls.functionObject != "jl_f_opaque_closure_call")
+            NewDefs.push_back(decls.functionObject);
+        if (!decls.specFunctionObject.empty())
+            NewDefs.push_back(decls.specFunctionObject);
+    }
+    auto Addrs = jl_ExecutionEngine->findSymbols(NewDefs);
+
     size_t i = 0;
+    size_t nextaddr = 0;
     for (auto &def : params.compiled_functions) {
         jl_code_instance_t *this_code = def.first;
         if (i < jl_timing_print_limit)
             jl_timing_show_func_sig(this_code->def->specTypes, JL_TIMING_DEFAULT_BLOCK);
 
-        jl_llvm_functions_t decls = std::get<1>(def.second);
+        jl_llvm_functions_t &decls = std::get<1>(def.second);
         jl_callptr_t addr;
         bool isspecsig = false;
         if (decls.functionObject == "jl_fptr_args") {
@@ -301,12 +334,16 @@ static jl_callptr_t _jl_compile_codeinst(
             addr = jl_f_opaque_closure_call_addr;
         }
         else {
-            addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
+            assert(NewDefs[nextaddr] == decls.functionObject);
+            addr = (jl_callptr_t)Addrs[nextaddr++];
+            assert(addr);
             isspecsig = true;
         }
         if (!decls.specFunctionObject.empty()) {
             void *prev_specptr = NULL;
-            auto spec = (void*)getAddressForFunction(decls.specFunctionObject);
+            assert(NewDefs[nextaddr] == decls.specFunctionObject);
+            void *spec = (void*)Addrs[nextaddr++];
+            assert(spec);
             if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
                 // only set specsig and invoke if we were the first to set specptr
                 jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
@@ -564,6 +601,19 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
     return jl_an_empty_string;
 }
 
+#if JL_LLVM_VERSION >= 180000
+CodeGenOptLevel CodeGenOptLevelFor(int optlevel)
+{
+#ifdef DISABLE_OPT
+    return CodeGenOptLevel::None;
+#else
+    return optlevel == 0 ? CodeGenOptLevel::None :
+        optlevel == 1 ? CodeGenOptLevel::Less :
+        optlevel == 2 ? CodeGenOptLevel::Default :
+        CodeGenOptLevel::Aggressive;
+#endif
+}
+#else
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
 {
 #ifdef DISABLE_OPT
@@ -575,6 +625,7 @@ CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
         CodeGenOpt::Aggressive;
 #endif
 }
+#endif
 
 static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT
 {
@@ -583,48 +634,6 @@ static auto countBasicBlocks(const Function &F) JL_NOTSAFEPOINT
 
 static constexpr size_t N_optlevels = 4;
 
-static Expected<orc::ThreadSafeModule> validateExternRelocations(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) JL_NOTSAFEPOINT {
-#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK)
-    auto isIntrinsicFunction = [](GlobalObject &GO) JL_NOTSAFEPOINT {
-        auto F = dyn_cast<Function>(&GO);
-        if (!F)
-            return false;
-        return F->isIntrinsic() || F->getName().startswith("julia.");
-    };
-    // validate the relocations for M (only for RuntimeDyld, JITLink performs its own symbol validation)
-    auto Err = TSM.withModuleDo([isIntrinsicFunction](Module &M) JL_NOTSAFEPOINT {
-        Error Err = Error::success();
-        for (auto &GO : make_early_inc_range(M.global_objects())) {
-            if (!GO.isDeclarationForLinker())
-                continue;
-            if (GO.use_empty()) {
-                GO.eraseFromParent();
-                continue;
-            }
-            if (isIntrinsicFunction(GO))
-                continue;
-            auto sym = jl_ExecutionEngine->findUnmangledSymbol(GO.getName());
-            if (sym)
-                continue;
-            // TODO have we ever run into this check? It's been guaranteed to not
-            // fire in an assert build, since previously LLVM would abort due to
-            // not handling the error if we didn't find the unmangled symbol
-            if (SectionMemoryManager::getSymbolAddressInProcess(
-                            jl_ExecutionEngine->getMangledName(GO.getName()))) {
-                consumeError(sym.takeError());
-                continue;
-            }
-            Err = joinErrors(std::move(Err), sym.takeError());
-        }
-        return Err;
-    });
-    if (Err) {
-        return std::move(Err);
-    }
-#endif
-    return std::move(TSM);
-}
-
 static Expected<orc::ThreadSafeModule> selectOptLevel(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
     TSM.withModuleDo([](Module &M) {
         size_t opt_level = std::max(static_cast<int>(jl_options.opt_level), 0);
@@ -655,21 +664,8 @@ static Expected<orc::ThreadSafeModule> selectOptLevel(orc::ThreadSafeModule TSM,
     return std::move(TSM);
 }
 
-static void recordDebugTSM(orc::MaterializationResponsibility &, orc::ThreadSafeModule TSM) JL_NOTSAFEPOINT {
-    auto ptr = TSM.withModuleDo([](Module &M) JL_NOTSAFEPOINT {
-        auto md = M.getModuleFlag("julia.__jit_debug_tsm_addr");
-        if (!md)
-            return static_cast<orc::ThreadSafeModule *>(nullptr);
-        return reinterpret_cast<orc::ThreadSafeModule *>(cast<ConstantInt>(cast<ConstantAsMetadata>(md)->getValue())->getZExtValue());
-    });
-    if (ptr) {
-        *ptr = std::move(TSM);
-    }
-}
-
 void jl_register_jit_object(const object::ObjectFile &debugObj,
-                            std::function<uint64_t(const StringRef &)> getLoadAddress,
-                            std::function<void *(void *)> lookupWriteAddress);
+                            std::function<uint64_t(const StringRef &)> getLoadAddress);
 
 namespace {
 
@@ -684,22 +680,19 @@ struct JITObjectInfo {
 class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
     std::mutex PluginMutex;
     std::map<MaterializationResponsibility *, std::unique_ptr<JITObjectInfo>> PendingObjs;
-    // Resources from distinct `MaterializationResponsibility`s can get merged
-    // after emission, so we can have multiple debug objects per resource key.
-    std::map<ResourceKey, SmallVector<std::unique_ptr<JITObjectInfo>, 0>> RegisteredObjs;
 
 public:
     void notifyMaterializing(MaterializationResponsibility &MR, jitlink::LinkGraph &G,
                              jitlink::JITLinkContext &Ctx,
                              MemoryBufferRef InputObject) override
     {
-        // Keeping around a full copy of the input object file (and re-parsing it) is
-        // wasteful, but for now, this lets us reuse the existing debuginfo.cpp code.
-        // Should look into just directly pulling out all the information required in
-        // a JITLink pass and just keeping the required tables/DWARF sections around
-        // (perhaps using the LLVM DebuggerSupportPlugin as a reference).
         auto NewBuffer =
             MemoryBuffer::getMemBufferCopy(InputObject.getBuffer(), G.getName());
+        // Re-parsing the InputObject is wasteful, but for now, this lets us
+        // reuse the existing debuginfo.cpp code. Should look into just
+        // directly pulling out all the information required in a JITLink pass
+        // and just keeping the required tables/DWARF sections around (perhaps
+        // using the LLVM DebuggerSupportPlugin as a reference).
         auto NewObj =
             cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
 
@@ -732,14 +725,9 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
                 return result->second;
             };
 
-            jl_register_jit_object(*NewInfo->Object, getLoadAddress, nullptr);
-        }
-
-        cantFail(MR.withResourceKeyDo([&](ResourceKey K) {
-            std::lock_guard<std::mutex> lock(PluginMutex);
-            RegisteredObjs[K].push_back(std::move(PendingObjs[&MR]));
+            jl_register_jit_object(*NewInfo->Object, getLoadAddress);
             PendingObjs.erase(&MR);
-        }));
+        }
 
         return Error::success();
     }
@@ -750,32 +738,23 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
         PendingObjs.erase(&MR);
         return Error::success();
     }
+
 #if JL_LLVM_VERSION >= 160000
     Error notifyRemovingResources(JITDylib &JD, orc::ResourceKey K) override
 #else
-    Error notifyRemovingResources(ResourceKey K) override
+    Error notifyRemovingResources(orc::ResourceKey K) override
 #endif
     {
-        std::lock_guard<std::mutex> lock(PluginMutex);
-        RegisteredObjs.erase(K);
-        // TODO: If we ever unload code, need to notify debuginfo registry.
         return Error::success();
     }
 
 #if JL_LLVM_VERSION >= 160000
-    void notifyTransferringResources(JITDylib &JD, ResourceKey DstKey, ResourceKey SrcKey) override
+    void notifyTransferringResources(JITDylib &JD, orc::ResourceKey DstKey,
+                                     orc::ResourceKey SrcKey) override {}
 #else
-    void notifyTransferringResources(ResourceKey DstKey, ResourceKey SrcKey) override
+    void notifyTransferringResources(orc::ResourceKey DstKey,
+                                     orc::ResourceKey SrcKey) override {}
 #endif
-    {
-        std::lock_guard<std::mutex> lock(PluginMutex);
-        auto SrcIt = RegisteredObjs.find(SrcKey);
-        if (SrcIt != RegisteredObjs.end()) {
-            for (std::unique_ptr<JITObjectInfo> &Info : SrcIt->second)
-                RegisteredObjs[DstKey].push_back(std::move(Info));
-            RegisteredObjs.erase(SrcIt);
-        }
-    }
 
     void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &,
                           jitlink::PassConfiguration &PassConfig) override
@@ -815,12 +794,12 @@ class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
 
 class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
 private:
-    std::atomic<size_t> &total_size;
+    std::atomic<size_t> &jit_bytes_size;
 
 public:
 
-    JLMemoryUsagePlugin(std::atomic<size_t> &total_size)
-        : total_size(total_size) {}
+    JLMemoryUsagePlugin(std::atomic<size_t> &jit_bytes_size)
+        : jit_bytes_size(jit_bytes_size) {}
 
     Error notifyFailed(orc::MaterializationResponsibility &MR) override {
         return Error::success();
@@ -869,7 +848,7 @@ class JLMemoryUsagePlugin : public ObjectLinkingLayer::Plugin {
             }
             (void) code_size;
             (void) data_size;
-            this->total_size.fetch_add(graph_size, std::memory_order_relaxed);
+            this->jit_bytes_size.fetch_add(graph_size, std::memory_order_relaxed);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITSize, graph_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITCodeSize, code_size);
             jl_timing_counter_inc(JL_TIMING_COUNTER_JITDataSize, data_size);
@@ -977,32 +956,11 @@ class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
 };
 
 
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-void *lookupWriteAddressFor(RTDyldMemoryManager *MemMgr, void *rt_addr);
-#endif
-
 void registerRTDyldJITObject(const object::ObjectFile &Object,
                              const RuntimeDyld::LoadedObjectInfo &L,
                              const std::shared_ptr<RTDyldMemoryManager> &MemMgr)
 {
-    auto SavedObject = L.getObjectForDebug(Object).takeBinary();
-    // If the debug object is unavailable, save (a copy of) the original object
-    // for our backtraces.
-    // This copy seems unfortunate, but there doesn't seem to be a way to take
-    // ownership of the original buffer.
-    if (!SavedObject.first) {
-        auto NewBuffer =
-            MemoryBuffer::getMemBufferCopy(Object.getData(), Object.getFileName());
-        auto NewObj =
-            cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
-        SavedObject = std::make_pair(std::move(NewObj), std::move(NewBuffer));
-    }
-    const object::ObjectFile *DebugObj = SavedObject.first.release();
-    SavedObject.second.release();
-
     StringMap<object::SectionRef> loadedSections;
-    // Use the original Object, not the DebugObject, as this is used for the
-    // RuntimeDyld::LoadedObjectInfo lookup.
     for (const object::SectionRef &lSection : Object.sections()) {
         auto sName = lSection.getName();
         if (sName) {
@@ -1019,13 +977,8 @@ void registerRTDyldJITObject(const object::ObjectFile &Object,
         return L.getSectionLoadAddress(search->second);
     };
 
-    jl_register_jit_object(*DebugObj, getLoadAddress,
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-        [MemMgr](void *p) { return lookupWriteAddressFor(MemMgr.get(), p); }
-#else
-        nullptr
-#endif
-    );
+    auto DebugObject = L.getObjectForDebug(Object); // ELF requires us to make a copy to mutate the header with the section load addresses. On other platforms this is a no-op.
+    jl_register_jit_object(DebugObject.getBinary() ? *DebugObject.getBinary() : Object, getLoadAddress);
 }
 namespace {
     static std::unique_ptr<TargetMachine> createTargetMachine() JL_NOTSAFEPOINT {
@@ -1189,7 +1142,7 @@ namespace {
                 {
                     if (*jl_ExecutionEngine->get_dump_llvm_opt_stream()) {
                         for (auto &F : M.functions()) {
-                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                            if (F.isDeclaration() || F.getName().starts_with("jfptr_")) {
                                 continue;
                             }
                             // Each function is printed as a YAML object with several attributes
@@ -1242,7 +1195,7 @@ namespace {
                         // Print LLVM function statistics _after_ optimization
                         ios_printf(stream, "  after: \n");
                         for (auto &F : M.functions()) {
-                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                            if (F.isDeclaration() || F.getName().starts_with("jfptr_")) {
                                 continue;
                             }
                             Stat(F).dump(stream);
@@ -1316,8 +1269,9 @@ namespace {
                 }
 
                 // Windows needs some inline asm to help
-                // build unwind tables
-                jl_decorate_module(M);
+                // build unwind tables, if they have any functions to decorate
+                if (!M.functions().empty())
+                    jl_decorate_module(M);
             });
             return std::move(TSM);
         }
@@ -1429,7 +1383,7 @@ struct JuliaOJIT::DLSymOptimizer {
     void operator()(Module &M) {
         for (auto &GV : M.globals()) {
             auto Name = GV.getName();
-            if (Name.startswith("jlplt") && Name.endswith("got")) {
+            if (Name.starts_with("jlplt") && Name.ends_with("got")) {
                 auto fname = GV.getAttribute("julia.fname").getValueAsString().str();
                 void *addr;
                 if (GV.hasAttribute("julia.libname")) {
@@ -1598,6 +1552,7 @@ JuliaOJIT::JuliaOJIT()
 #ifdef JL_USE_JITLINK
     MemMgr(createJITLinkMemoryManager()),
     ObjectLayer(ES, *MemMgr),
+    CompileLayer(ES, ObjectLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
 #else
     MemMgr(createRTDyldMemoryManager()),
     ObjectLayer(
@@ -1607,15 +1562,12 @@ JuliaOJIT::JuliaOJIT()
                 return result;
             }
         ),
-#endif
     LockLayer(ObjectLayer),
     CompileLayer(ES, LockLayer, std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM)),
+#endif
     JITPointersLayer(ES, CompileLayer, orc::IRTransformLayer::TransformFunction(JITPointersT(SharedBytes, RLST_mutex))),
     OptimizeLayer(ES, JITPointersLayer, orc::IRTransformLayer::TransformFunction(OptimizerT<N_optlevels>(*TM, PrintLLVMTimers, llvm_printing_mutex))),
-    OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel)),
-    DepsVerifyLayer(ES, OptSelLayer, orc::IRTransformLayer::TransformFunction(validateExternRelocations)),
-    ExternalCompileLayer(ES, LockLayer,
-        std::make_unique<CompilerT<N_optlevels>>(orc::irManglingOptionsFromTargetOptions(TM->Options), *TM))
+    OptSelLayer(ES, OptimizeLayer, orc::IRTransformLayer::TransformFunction(selectOptLevel))
 {
     JL_MUTEX_INIT(&this->jitlock, "JuliaOJIT");
 #ifdef JL_USE_JITLINK
@@ -1630,7 +1582,7 @@ JuliaOJIT::JuliaOJIT()
         ES, std::move(ehRegistrar)));
 
     ObjectLayer.addPlugin(std::make_unique<JLDebuginfoPlugin>());
-    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(total_size));
+    ObjectLayer.addPlugin(std::make_unique<JLMemoryUsagePlugin>(jit_bytes_size));
 #else
     ObjectLayer.setNotifyLoaded(
         [this](orc::MaterializationResponsibility &MR,
@@ -1639,7 +1591,6 @@ JuliaOJIT::JuliaOJIT()
             registerRTDyldJITObject(Object, LO, MemMgr);
         });
 #endif
-    CompileLayer.setNotifyCompiled(recordDebugTSM);
 
     std::string ErrorStr;
 
@@ -1683,7 +1634,7 @@ JuliaOJIT::JuliaOJIT()
                   DL.getGlobalPrefix(),
                   [&](const orc::SymbolStringPtr &S) {
                         const char *const atomic_prefix = "__atomic_";
-                        return (*S).startswith(atomic_prefix);
+                        return (*S).starts_with(atomic_prefix);
                   })));
         }
     }
@@ -1800,8 +1751,8 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
 {
     JL_TIMING(LLVM_JIT, JIT_Total);
     ++ModulesAdded;
+#ifndef JL_USE_JITLINK
     orc::SymbolLookupSet NewExports;
-    orc::ThreadSafeModule CurrentlyCompiling;
     TSM.withModuleDo([&](Module &M) JL_NOTSAFEPOINT {
         for (auto &F : M.global_values()) {
             if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
@@ -1810,42 +1761,24 @@ void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
             }
         }
         assert(!verifyLLVMIR(M));
-        auto jit_debug_tsm_addr = ConstantInt::get(Type::getIntNTy(M.getContext(), sizeof(void*) * CHAR_BIT), (uintptr_t) &CurrentlyCompiling);
-        M.addModuleFlag(Module::Error, "julia.__jit_debug_tsm_addr", jit_debug_tsm_addr);
     });
+#endif
 
-    // TODO: what is the performance characteristics of this?
-    auto Err = DepsVerifyLayer.add(JD, std::move(TSM));
+    auto Err = OptSelLayer.add(JD, std::move(TSM));
     if (Err) {
         ES.reportError(std::move(Err));
         errs() << "Failed to add module to JIT!\n";
-        if (CurrentlyCompiling) {
-            CurrentlyCompiling.withModuleDo([](Module &M) JL_NOTSAFEPOINT { errs() << "Dumping failing module\n" << M << "\n"; });
-        } else {
-            errs() << "Module unavailable to be printed\n";
-        }
         abort();
     }
+#ifndef JL_USE_JITLINK
     // force eager compilation (for now), due to memory management specifics
     // (can't handle compilation recursion)
     auto Lookups = ES.lookup({{&JD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly}}, NewExports);
     if (!Lookups) {
         ES.reportError(Lookups.takeError());
         errs() << "Failed to lookup symbols in module!\n";
-        if (CurrentlyCompiling) {
-            CurrentlyCompiling.withModuleDo([](Module &M) JL_NOTSAFEPOINT { errs() << "Dumping failing module\n" << M << "\n"; });
-        } else {
-            errs() << "Module unavailable to be printed\n";
-        }
-    }
-    for (auto &Sym : *Lookups) {
-        #if JL_LLVM_VERSION >= 170000
-        assert(Sym.second.getAddress());
-        #else
-        assert(Sym.second);
-        #endif
-        (void) Sym;
     }
+#endif
 }
 
 Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM, bool ShouldOptimize)
@@ -1864,12 +1797,33 @@ Error JuliaOJIT::addExternalModule(orc::JITDylib &JD, orc::ThreadSafeModule TSM,
             return Error::success();
             }))
         return Err;
-    return ExternalCompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM));
+    return CompileLayer.add(JD.getDefaultResourceTracker(), std::move(TSM));
 }
 
 Error JuliaOJIT::addObjectFile(orc::JITDylib &JD, std::unique_ptr<MemoryBuffer> Obj) {
     assert(Obj && "Can not add null object");
+#ifdef JL_USE_JITLINK
+    return ObjectLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
+#else
     return LockLayer.add(JD.getDefaultResourceTracker(), std::move(Obj));
+#endif
+}
+
+SmallVector<uint64_t> JuliaOJIT::findSymbols(ArrayRef<StringRef> Names)
+{
+    DenseMap<orc::NonOwningSymbolStringPtr, size_t> Unmangled;
+    orc::SymbolLookupSet Exports;
+    for (StringRef Name : Names) {
+        auto Mangled = ES.intern(getMangledName(Name));
+        Unmangled[NonOwningSymbolStringPtr(Mangled)] = Unmangled.size();
+        Exports.add(std::move(Mangled));
+    }
+    SymbolMap Syms = cantFail(ES.lookup(orc::makeJITDylibSearchOrder(ArrayRef(&JD)), std::move(Exports)));
+    SmallVector<uint64_t> Addrs(Names.size());
+    for (auto it : Syms) {
+        Addrs[Unmangled.at(orc::NonOwningSymbolStringPtr(it.first))] = it.second.getAddress().getValue();
+    }
+    return Addrs;
 }
 
 #if JL_LLVM_VERSION >= 170000
@@ -1901,7 +1855,7 @@ uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name)
         consumeError(addr.takeError());
         return 0;
     }
-    return cantFail(std::move(addr)).getAddress().getValue();
+    return addr->getAddress().getValue();
 }
 
 uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
@@ -1911,7 +1865,7 @@ uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
         consumeError(addr.takeError());
         return 0;
     }
-    return cantFail(std::move(addr)).getAddress().getValue();
+    return addr->getAddress().getValue();
 }
 #else
 JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
@@ -1987,41 +1941,92 @@ StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_callptr_t invoke, jl
     return *fname;
 }
 
-
 #ifdef JL_USE_JITLINK
-extern "C" orc::shared::CWrapperFunctionResult
-llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size);
+#if JL_LLVM_VERSION >= 170000
+#define addAbsoluteToMap(map,name) \
+    (map[mangle(#name)] = {ExecutorAddr::fromPtr(&name), JITSymbolFlags::Exported | JITSymbolFlags::Callable}, orc::ExecutorAddr::fromPtr(&name))
+#else
+#define addAbsoluteToMap(map,name) \
+    (map[mangle(#name)] = JITEvaluatedSymbol::fromPointer(&name, JITSymbolFlags::Exported | JITSymbolFlags::Callable), orc::ExecutorAddr::fromPtr(&name))
+#endif
 
 void JuliaOJIT::enableJITDebuggingSupport()
 {
     orc::SymbolMap GDBFunctions;
-    #if JL_LLVM_VERSION >= 170000
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = {ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBAllocAction), JITSymbolFlags::Exported | JITSymbolFlags::Callable};
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = {ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper), JITSymbolFlags::Exported | JITSymbolFlags::Callable};
-    #else
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBAllocAction")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBAllocAction, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
-    GDBFunctions[mangle("llvm_orc_registerJITLoaderGDBWrapper")] = JITEvaluatedSymbol::fromPointer(&llvm_orc_registerJITLoaderGDBWrapper, JITSymbolFlags::Exported | JITSymbolFlags::Callable);
-    #endif
+    addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBAllocAction);
+    auto registerJITLoaderGDBWrapper = addAbsoluteToMap(GDBFunctions,llvm_orc_registerJITLoaderGDBWrapper);
     cantFail(JD.define(orc::absoluteSymbols(GDBFunctions)));
     if (TM->getTargetTriple().isOSBinFormatMachO())
         ObjectLayer.addPlugin(cantFail(orc::GDBJITDebugInfoRegistrationPlugin::Create(ES, JD, TM->getTargetTriple())));
 #ifndef _COMPILER_ASAN_ENABLED_ // TODO: Fix duplicated sections spam #51794
     else if (TM->getTargetTriple().isOSBinFormatELF())
         //EPCDebugObjectRegistrar doesn't take a JITDylib, so we have to directly provide the call address
-        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, orc::ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBWrapper))));
+        ObjectLayer.addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(ES, std::make_unique<orc::EPCDebugObjectRegistrar>(ES, registerJITLoaderGDBWrapper)));
+#endif
+}
+
+void JuliaOJIT::enableIntelJITEventListener()
+{
+#if JL_LLVM_VERSION >= 190000
+    if (TT.isOSBinFormatELF()) {
+        orc::SymbolMap VTuneFunctions;
+        auto RegisterImplAddr = addAbsoluteToMap(VTuneFunctions,llvm_orc_registerVTuneImpl);
+        auto UnregisterImplAddr = addAbsoluteToMap(VTuneFunctions,llvm_orc_unregisterVTuneImpl);
+        ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create()));
+        //ObjectLayer.addPlugin(cantFail(VTuneSupportPlugin::Create(ES.getExecutorProcessControl(),
+        //                           JD, /*EmitDebugInfo=*/true,
+        //                           /*TestMode=*/false)));
+        bool EmitDebugInfo = true;
+        ObjectLayer.addPlugin(std::make_unique<VTuneSupportPlugin>(
+            ES.getExecutorProcessControl(), RegisterImplAddr, UnregisterImplAddr, EmitDebugInfo));
+    }
+#endif
+}
+
+void JuliaOJIT::enableOProfileJITEventListener()
+{
+    // implement when available in LLVM
+}
+
+void JuliaOJIT::enablePerfJITEventListener()
+{
+#if JL_LLVM_VERSION >= 180000
+    orc::SymbolMap PerfFunctions;
+    auto StartAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfStart);
+    auto EndAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfEnd);
+    auto ImplAddr = addAbsoluteToMap(PerfFunctions,llvm_orc_registerJITLoaderPerfImpl);
+    cantFail(JD.define(orc::absoluteSymbols(PerfFunctions)));
+    if (TM->getTargetTriple().isOSBinFormatELF()) {
+        ObjectLayer.addPlugin(cantFail(DebugInfoPreservationPlugin::Create()));
+        //ObjectLayer.addPlugin(cantFail(PerfSupportPlugin::Create(
+        //    ES.getExecutorProcessControl(), *JD, true, true)));
+        bool EmitDebugInfo = true, EmitUnwindInfo = true;
+        ObjectLayer.addPlugin(std::make_unique<PerfSupportPlugin>(
+            ES.getExecutorProcessControl(), StartAddr, EndAddr, ImplAddr, EmitDebugInfo, EmitUnwindInfo));
+    }
 #endif
 }
 #else
+void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
+{
+    if (L)
+        ObjectLayer.registerJITEventListener(*L);
+}
 void JuliaOJIT::enableJITDebuggingSupport()
 {
     RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
 }
-
-void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
+void JuliaOJIT::enableIntelJITEventListener()
 {
-    if (!L)
-        return;
-    this->ObjectLayer.registerJITEventListener(*L);
+    RegisterJITEventListener(JITEventListener::createIntelJITEventListener());
+}
+void JuliaOJIT::enableOProfileJITEventListener()
+{
+    RegisterJITEventListener(JITEventListener::createOProfileJITEventListener());
+}
+void JuliaOJIT::enablePerfJITEventListener()
+{
+    RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
 }
 #endif
 
@@ -2042,19 +2047,20 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV)
     return getMangledName(GV->getName());
 }
 
-#ifdef JL_USE_JITLINK
 size_t JuliaOJIT::getTotalBytes() const
 {
-    return total_size.load(std::memory_order_relaxed);
+    auto bytes = jit_bytes_size.load(std::memory_order_relaxed);
+#ifndef JL_USE_JITLINK
+    size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
+    bytes += getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+#endif
+    return bytes;
 }
-#else
-size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm) JL_NOTSAFEPOINT;
 
-size_t JuliaOJIT::getTotalBytes() const
+void JuliaOJIT::addBytes(size_t bytes)
 {
-    return getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+    jit_bytes_size.fetch_add(bytes, std::memory_order_relaxed);
 }
-#endif
 
 void JuliaOJIT::printTimers()
 {
@@ -2238,23 +2244,67 @@ static void jl_decorate_module(Module &M) {
     if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
         // Add special values used by debuginfo to build the UnwindData table registration for Win64
         // This used to be GV, but with https://reviews.llvm.org/D100944 we no longer can emit GV into `.text`
-        // TODO: The data is set in debuginfo.cpp but it should be okay to actually emit it here.
-        M.appendModuleInlineAsm("\
-    .section .text                  \n\
-    .type   __UnwindData,@object    \n\
-    .p2align        2, 0x90         \n\
-    __UnwindData:                   \n\
-        .zero   12                  \n\
-        .size   __UnwindData, 12    \n\
-                                    \n\
-        .type   __catchjmp,@object  \n\
-        .p2align        2, 0x90     \n\
-    __catchjmp:                     \n\
-        .zero   12                  \n\
-        .size   __catchjmp, 12");
+        // and with JITLink it became difficult to change the content afterwards, but we
+        // would prefer that this simple content wasn't recompiled in every single module,
+        // so we emit the necessary PLT trampoline as inline assembly.
+        // This is somewhat duplicated with the .pdata section, but we haven't been able to
+        // use that yet due to relocation issues.
+#define ASM_USES_ELF // use ELF or COFF syntax based on FORCE_ELF
+        StringRef inline_asm(
+    ".section"
+#if JL_LLVM_VERSION >= 180000
+        " .ltext,\"ax\",@progbits\n"
+#else
+        " .text\n"
+#endif
+    ".globl __julia_personality\n"
+    "\n"
+#ifdef ASM_USES_ELF
+    ".type __UnwindData,@object\n"
+#else
+    ".def __UnwindData\n"
+    ".scl 2\n"
+    ".type 0\n"
+    ".endef\n"
+#endif
+    ".p2align        2, 0x90\n"
+    "__UnwindData:\n"
+    "  .byte 0x09;\n" // version info, UNW_FLAG_EHANDLER
+    "  .byte 4;\n"    // size of prolog (bytes)
+    "  .byte 2;\n"    // count of unwind codes (slots)
+    "  .byte 0x05;\n" // frame register (rbp) = rsp
+    "  .byte 4;\n"    // second instruction
+    "  .byte 0x03;\n" // mov RBP, RSP
+    "  .byte 1;\n"    // first instruction
+    "  .byte 0x50;\n" // push RBP
+    "  .int __catchjmp - "
+#if JL_LLVM_VERSION >= 180000
+    ".ltext;\n" // Section-relative offset (if using COFF and JITLink, this can be relative to __ImageBase instead, though then we could possibly use pdata/xdata directly then)
+#else
+    ".text;\n"
+#endif
+    ".size __UnwindData, 12\n"
+    "\n"
+#ifdef ASM_USES_ELF
+    ".type __catchjmp,@function\n"
+#else
+    ".def __catchjmp\n"
+    ".scl 2\n"
+    ".type 32\n"
+    ".endef\n"
+#endif
+    ".p2align        2, 0x90\n"
+    "__catchjmp:\n"
+    "  movabsq $__julia_personality, %rax\n"
+    "  jmpq *%rax\n"
+    ".size __catchjmp, . - __catchjmp\n"
+    "\n");
+        M.appendModuleInlineAsm(inline_asm);
     }
+#undef ASM_USES_ELF
 }
 
+#ifndef JL_USE_JITLINK
 // Implements Tarjan's SCC (strongly connected components) algorithm, simplified to remove the count variable
 static int jl_add_to_ee(
         orc::ThreadSafeModule &M,
@@ -2320,13 +2370,7 @@ static int jl_add_to_ee(
     jl_ExecutionEngine->addModule(std::move(M));
     return 0;
 }
-
-static uint64_t getAddressForFunction(StringRef fname)
-{
-    auto addr = jl_ExecutionEngine->getFunctionAddress(fname);
-    assert(addr);
-    return addr;
-}
+#endif
 
 // helper function for adding a DLLImport (dlsym) address to the execution engine
 void add_named_global(StringRef name, void *addr)
@@ -2339,3 +2383,9 @@ size_t jl_jit_total_bytes_impl(void)
 {
     return jl_ExecutionEngine->getTotalBytes();
 }
+
+// API for adding bytes to record being owned by the JIT
+void jl_jit_add_bytes(size_t bytes)
+{
+    jl_ExecutionEngine->addBytes(bytes);
+}
diff --git a/src/jitlayers.h b/src/jitlayers.h
index aed88f05a1cfb..3353a4093bd27 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -26,9 +26,10 @@
 #include "julia_internal.h"
 #include "platform.h"
 #include "llvm-codegen-shared.h"
+#include "llvm-version.h"
 #include <stack>
 #include <queue>
-
+#include <tuple>
 
 // As of LLVM 13, there are two runtime JIT linker implementations, the older
 // RuntimeDyld (used via orc::RTDyldObjectLinkingLayer) and the newer JITLink
@@ -46,7 +47,7 @@
 // and feature support (e.g. Windows, JITEventListeners for various profilers,
 // etc.). Thus, we currently only use JITLink where absolutely required, that is,
 // for Mac/aarch64 and Linux/aarch64.
-// #define JL_FORCE_JITLINK
+//#define JL_FORCE_JITLINK
 
 #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
 # define HAS_SANITIZER
@@ -64,6 +65,7 @@
 using namespace llvm;
 
 extern "C" jl_cgparams_t jl_default_cgparams;
+extern arraylist_t new_invokes;
 
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeContext, LLVMOrcThreadSafeContextRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ThreadSafeModule, LLVMOrcThreadSafeModuleRef)
@@ -90,6 +92,7 @@ struct OptimizationOptions {
     bool enable_vector_pipeline;
     bool remove_ni;
     bool cleanup;
+    bool warn_missed_transformations;
 
     static constexpr OptimizationOptions defaults(
         bool lower_intrinsics=true,
@@ -103,12 +106,13 @@ struct OptimizationOptions {
         bool enable_loop_optimizations=true,
         bool enable_vector_pipeline=true,
         bool remove_ni=true,
-        bool cleanup=true) {
+        bool cleanup=true,
+        bool warn_missed_transformations=false) {
         return {lower_intrinsics, dump_native, external_use, llvm_only,
                 always_inline, enable_early_simplifications,
                 enable_early_optimizations, enable_scalar_optimizations,
                 enable_loop_optimizations, enable_vector_pipeline,
-                remove_ni, cleanup};
+                remove_ni, cleanup, warn_missed_transformations};
     }
 };
 
@@ -208,7 +212,7 @@ struct jl_codegen_call_target_t {
 typedef SmallVector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>, 0> jl_workqueue_t;
 // TODO DenseMap?
 typedef std::map<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_compiled_functions_t;
-
+typedef std::list<std::tuple<std::string, std::string, unsigned int>> CallFrames;
 struct jl_codegen_params_t {
     orc::ThreadSafeContext tsctx;
     orc::ThreadSafeContext::Lock tsctx_lock;
@@ -227,6 +231,7 @@ struct jl_codegen_params_t {
     std::map<jl_datatype_t*, DIType*> ditypes;
     std::map<jl_datatype_t*, Type*> llvmtypes;
     DenseMap<Constant*, GlobalVariable*> mergedConstants;
+    llvm::MapVector<jl_method_instance_t*, std::tuple<jl_method_instance_t*, CallFrames>> enqueuers;
     // Map from symbol name (in a certain library) to its GV in sysimg and the
     // DL handle address in the current session.
     StringMap<std::pair<GlobalVariable*,SymMapGV>> libMapGV;
@@ -320,12 +325,7 @@ class MaxAlignedAllocImpl
     LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, Align Alignment) {
         Align MaxAlign = alignment(Size);
         assert(Alignment < MaxAlign); (void)Alignment;
-        void* result = jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset);
-#ifdef MMTK_GC
-        jl_ptls_t ptls = jl_current_task->ptls;
-        mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(result), Size);
-#endif
-        return result;
+        return jl_gc_perm_alloc(Size, 0, MaxAlign.value(), offset);
     }
 
     inline LLVM_ATTRIBUTE_RETURNS_NONNULL
@@ -363,7 +363,6 @@ class JuliaOJIT {
     typedef orc::ObjectLinkingLayer ObjLayerT;
 #else
     typedef orc::RTDyldObjectLinkingLayer ObjLayerT;
-#endif
     struct LockLayerT : public orc::ObjectLayer {
 
         LockLayerT(orc::ObjectLayer &BaseLayer) JL_NOTSAFEPOINT : orc::ObjectLayer(BaseLayer.getExecutionSession()), BaseLayer(BaseLayer) {}
@@ -381,11 +380,11 @@ class JuliaOJIT {
         orc::ObjectLayer &BaseLayer;
         std::mutex EmissionMutex;
     };
+#endif
     typedef orc::IRCompileLayer CompileLayerT;
     typedef orc::IRTransformLayer JITPointersLayerT;
     typedef orc::IRTransformLayer OptimizeLayerT;
     typedef orc::IRTransformLayer OptSelLayerT;
-    typedef orc::IRTransformLayer DepsVerifyLayerT;
     typedef object::OwningBinary<object::ObjectFile> OwningObj;
     template
     <typename ResourceT, size_t max = 0,
@@ -500,10 +499,9 @@ class JuliaOJIT {
 
     struct DLSymOptimizer;
 
-private:
-    // Custom object emission notification handler for the JuliaOJIT
-    template <typename ObjT, typename LoadResult>
-    void registerObject(const ObjT &Obj, const LoadResult &LO);
+#ifndef JL_USE_JITLINK
+    void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT;
+#endif
 
 public:
 
@@ -511,10 +509,9 @@ class JuliaOJIT {
     ~JuliaOJIT() JL_NOTSAFEPOINT;
 
     void enableJITDebuggingSupport() JL_NOTSAFEPOINT;
-#ifndef JL_USE_JITLINK
-    // JITLink doesn't support old JITEventListeners (yet).
-    void RegisterJITEventListener(JITEventListener *L) JL_NOTSAFEPOINT;
-#endif
+    void enableIntelJITEventListener() JL_NOTSAFEPOINT;
+    void enableOProfileJITEventListener() JL_NOTSAFEPOINT;
+    void enablePerfJITEventListener() JL_NOTSAFEPOINT;
 
     orc::SymbolStringPtr mangle(StringRef Name) JL_NOTSAFEPOINT;
     void addGlobalMapping(StringRef Name, uint64_t Addr) JL_NOTSAFEPOINT;
@@ -525,7 +522,7 @@ class JuliaOJIT {
                             bool ShouldOptimize = false) JL_NOTSAFEPOINT;
     Error addObjectFile(orc::JITDylib &JD,
                         std::unique_ptr<MemoryBuffer> Obj) JL_NOTSAFEPOINT;
-    orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return ExternalCompileLayer; };
+    orc::IRCompileLayer &getIRCompileLayer() JL_NOTSAFEPOINT { return CompileLayer; };
     orc::ExecutionSession &getExecutionSession() JL_NOTSAFEPOINT { return ES; }
     orc::JITDylib &getExternalJITDylib() JL_NOTSAFEPOINT { return ExternalJD; }
 
@@ -533,6 +530,7 @@ class JuliaOJIT {
     Expected<llvm::orc::ExecutorSymbolDef> findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
     Expected<llvm::orc::ExecutorSymbolDef> findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
     Expected<llvm::orc::ExecutorSymbolDef> findExternalJDSymbol(StringRef Name, bool ExternalJDOnly) JL_NOTSAFEPOINT;
+    SmallVector<uint64_t> findSymbols(ArrayRef<StringRef> Names) JL_NOTSAFEPOINT;
     #else
     JITEvaluatedSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) JL_NOTSAFEPOINT;
     JITEvaluatedSymbol findUnmangledSymbol(StringRef Name) JL_NOTSAFEPOINT;
@@ -562,6 +560,7 @@ class JuliaOJIT {
     TargetIRAnalysis getTargetIRAnalysis() const JL_NOTSAFEPOINT;
 
     size_t getTotalBytes() const JL_NOTSAFEPOINT;
+    void addBytes(size_t bytes) JL_NOTSAFEPOINT;
     void printTimers() JL_NOTSAFEPOINT;
 
     jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
@@ -608,20 +607,20 @@ class JuliaOJIT {
 
     ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
 
+    std::atomic<size_t> jit_bytes_size{0};
 #ifndef JL_USE_JITLINK
     const std::shared_ptr<RTDyldMemoryManager> MemMgr;
 #else
-    std::atomic<size_t> total_size{0};
     const std::unique_ptr<jitlink::JITLinkMemoryManager> MemMgr;
 #endif
     ObjLayerT ObjectLayer;
+#ifndef JL_USE_JITLINK
     LockLayerT LockLayer;
+#endif
     CompileLayerT CompileLayer;
     JITPointersLayerT JITPointersLayer;
     OptimizeLayerT OptimizeLayer;
     OptSelLayerT OptSelLayer;
-    DepsVerifyLayerT DepsVerifyLayer;
-    CompileLayerT ExternalCompileLayer;
 };
 extern JuliaOJIT *jl_ExecutionEngine;
 std::unique_ptr<Module> jl_create_llvm_module(StringRef name, LLVMContext &ctx, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple()) JL_NOTSAFEPOINT;
@@ -647,4 +646,8 @@ void optimizeDLSyms(Module &M);
 // NewPM
 #include "passes.h"
 
+#if JL_LLVM_VERSION >= 180000
+CodeGenOptLevel CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
+#else
 CodeGenOpt::Level CodeGenOptLevelFor(int optlevel) JL_NOTSAFEPOINT;
+#endif
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
index ff79966b2b01b..8711c14514145 100644
--- a/src/jl_exported_data.inc
+++ b/src/jl_exported_data.inc
@@ -51,6 +51,7 @@
     XX(jl_floatingpoint_type) \
     XX(jl_function_type) \
     XX(jl_binding_type) \
+    XX(jl_binding_partition_type) \
     XX(jl_globalref_type) \
     XX(jl_gotoifnot_type) \
     XX(jl_enternode_type) \
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
index 4cfd2da2defce..7f4c91e18714a 100644
--- a/src/jl_exported_funcs.inc
+++ b/src/jl_exported_funcs.inc
@@ -97,7 +97,6 @@
     XX(jl_cstr_to_string) \
     XX(jl_current_exception) \
     XX(jl_debug_method_invalidation) \
-    XX(jl_declare_constant) \
     XX(jl_defines_or_exports_p) \
     XX(jl_deprecate_binding) \
     XX(jl_dlclose) \
@@ -168,7 +167,7 @@
     XX(jl_gc_new_weakref) \
     XX(jl_gc_new_weakref_th) \
     XX(jl_gc_num) \
-    XX(jl_gc_pool_alloc) \
+    XX(jl_gc_small_alloc) \
     XX(jl_gc_queue_multiroot) \
     XX(jl_gc_queue_root) \
     XX(jl_gc_wb1_noinline) \
@@ -189,7 +188,7 @@
     XX(jl_gc_total_hrtime) \
     XX(jl_gdblookup) \
     XX(jl_generating_output) \
-    XX(jl_generic_function_def) \
+    XX(jl_declare_const_gf) \
     XX(jl_gensym) \
     XX(jl_getaffinity) \
     XX(jl_getallocationgranularity) \
@@ -321,6 +320,7 @@
     XX(jl_module_name) \
     XX(jl_module_names) \
     XX(jl_module_parent) \
+    XX(jl_module_getloc) \
     XX(jl_module_public) \
     XX(jl_module_public_p) \
     XX(jl_module_use) \
@@ -349,6 +349,8 @@
     XX(jl_new_typevar) \
     XX(jl_next_from_addrinfo) \
     XX(jl_normalize_to_compilable_sig) \
+    XX(jl_method_match_to_mi) \
+    XX(jl_get_unspecialized) \
     XX(jl_no_exc_handler) \
     XX(jl_object_id) \
     XX(jl_object_id_) \
@@ -425,7 +427,6 @@
     XX(jl_set_zero_subnormals) \
     XX(jl_sigatomic_begin) \
     XX(jl_sigatomic_end) \
-    XX(jl_sig_throw) \
     XX(jl_spawn) \
     XX(jl_specializations_get_linfo) \
     XX(jl_specializations_lookup) \
@@ -458,6 +459,8 @@
     XX(jl_test_cpu_feature) \
     XX(jl_threadid) \
     XX(jl_threadpoolid) \
+    XX(jl_get_ptls_rng) \
+    XX(jl_set_ptls_rng) \
     XX(jl_throw) \
     XX(jl_throw_out_of_memory_error) \
     XX(jl_too_few_args) \
@@ -526,6 +529,7 @@
     YY(jl_dump_native) \
     YY(jl_get_llvm_gvs) \
     YY(jl_get_llvm_external_fns) \
+    YY(jl_get_llvm_mis) \
     YY(jl_dump_function_asm) \
     YY(jl_LLVMCreateDisasm) \
     YY(jl_LLVMDisasmInstruction) \
@@ -546,7 +550,6 @@
     YY(jl_type_to_llvm) \
     YY(jl_getUnwindInfo) \
     YY(jl_get_libllvm) \
-    YY(jl_build_newpm_pipeline) \
     YY(jl_register_passbuilder_callbacks) \
     YY(LLVMExtraMPMAddCPUFeaturesPass) \
     YY(LLVMExtraMPMAddRemoveNIPass) \
diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm
index 2c5f42eda5ce8..463e39c41d00a 100644
--- a/src/jlfrontend.scm
+++ b/src/jlfrontend.scm
@@ -211,11 +211,11 @@
           (block
            ,@loc
            (call (core eval) ,name ,x)))
-       (= (call include ,x)
+       (= (call include (:: ,x (top AbstractString)))
           (block
            ,@loc
            (call (core _call_latest) (top include) ,name ,x)))
-       (= (call include (:: ,mex (top Function)) ,x)
+       (= (call include (:: ,mex (top Function)) (:: ,x (top AbstractString)))
           (block
            ,@loc
            (call (core _call_latest) (top include) ,mex ,name ,x)))))
diff --git a/src/jloptions.c b/src/jloptions.c
index 4cdec2c7b367f..35f0a76e3f6e7 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -77,6 +77,7 @@ JL_DLLEXPORT void jl_init_options(void)
                         1,    // can_inline
                         JL_OPTIONS_POLLY_ON, // polly
                         NULL, // trace_compile
+                        NULL, // trace_dispatch
                         JL_OPTIONS_FAST_MATH_DEFAULT,
                         0,    // worker
                         NULL, // cookie
@@ -101,6 +102,7 @@ JL_DLLEXPORT void jl_init_options(void)
                         0, // permalloc_pkgimg
                         0, // heap-size-hint
                         0, // trace_compile_timing
+                        0, // trim
     };
     jl_options_initialized = 1;
 }
@@ -251,18 +253,27 @@ static const char opts_hidden[]  =
     " --strip-ir                                    Remove IR (intermediate representation) of compiled\n"
     "                                               functions\n\n"
 
-    // compiler debugging (see the devdocs for tips on using these options)
+    // compiler debugging and experimental (see the devdocs for tips on using these options)
     " --output-unopt-bc <name>                      Generate unoptimized LLVM bitcode (.bc)\n"
     " --output-bc <name>                            Generate LLVM bitcode (.bc)\n"
     " --output-asm <name>                           Generate an assembly file (.s)\n"
     " --output-incremental={yes|no*}                Generate an incremental output file (rather than\n"
     "                                               complete)\n"
     " --trace-compile={stderr|name}                 Print precompile statements for methods compiled\n"
-    "                                               during execution or save to a path\n"
+    "                                               during execution or save to stderr or a path. Methods that\n"
+    "                                               were recompiled are printed in yellow or with a trailing\n"
+    "                                               comment if color is not supported\n"
     " --trace-compile-timing                        If --trace-compile is enabled show how long each took to\n"
     "                                               compile in ms\n"
     " --image-codegen                               Force generate code in imaging mode\n"
     " --permalloc-pkgimg={yes|no*}                  Copy the data section of package images into memory\n"
+    " --trim={no*|safe|unsafe|unsafe-warn}\n"
+    "                                               Build a sysimage including only code provably reachable\n"
+    "                                               from methods marked by calling `entrypoint`. In unsafe\n"
+    "                                               mode, the resulting binary might be missing needed code\n"
+    "                                               and can throw errors. With unsafe-warn warnings will be\n"
+    "                                               printed for dynamic call sites that might lead to such\n"
+    "                                               errors. In safe mode compile-time errors are given instead.\n"
 ;
 
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
@@ -284,6 +295,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_polly,
            opt_trace_compile,
            opt_trace_compile_timing,
+           opt_trace_dispatch,
            opt_math_mode,
            opt_worker,
            opt_bind_to,
@@ -309,7 +321,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_strip_ir,
            opt_heap_size_hint,
            opt_gc_threads,
-           opt_permalloc_pkgimg
+           opt_permalloc_pkgimg,
+           opt_trim,
     };
     static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:m:";
     static const struct option longopts[] = {
@@ -361,6 +374,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "polly",           required_argument, 0, opt_polly },
         { "trace-compile",   required_argument, 0, opt_trace_compile },
         { "trace-compile-timing",  no_argument, 0, opt_trace_compile_timing },
+        { "trace-dispatch",  required_argument, 0, opt_trace_dispatch },
         { "math-mode",       required_argument, 0, opt_math_mode },
         { "handle-signals",  required_argument, 0, opt_handle_signals },
         // hidden command line options
@@ -373,6 +387,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "strip-ir",        no_argument,       0, opt_strip_ir },
         { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg },
         { "heap-size-hint",  required_argument, 0, opt_heap_size_hint },
+        { "trim",  optional_argument, 0, opt_trim },
         { 0, 0, 0, 0 }
     };
 
@@ -816,6 +831,11 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         case opt_trace_compile_timing:
             jl_options.trace_compile_timing = 1;
             break;
+         case opt_trace_dispatch:
+            jl_options.trace_dispatch = strdup(optarg);
+            if (!jl_options.trace_dispatch)
+                jl_errorf("fatal error: failed to allocate memory: %s", strerror(errno));
+            break;
         case opt_math_mode:
             if (!strcmp(optarg,"ieee"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_OFF;
@@ -932,6 +952,18 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             else
                 jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg);
             break;
+        case opt_trim:
+            if (optarg == NULL || !strcmp(optarg,"safe"))
+                jl_options.trim = JL_TRIM_SAFE;
+            else if (!strcmp(optarg,"no"))
+                jl_options.trim = JL_TRIM_NO;
+            else if (!strcmp(optarg,"unsafe"))
+                jl_options.trim = JL_TRIM_UNSAFE;
+            else if (!strcmp(optarg,"unsafe-warn"))
+                jl_options.trim = JL_TRIM_UNSAFE_WARN;
+            else
+                jl_errorf("julia: invalid argument to --trim={safe|no|unsafe|unsafe-warn} (%s)", optarg);
+            break;
         default:
             jl_errorf("julia: unhandled option -- %c\n"
                       "This is a bug, please report it.", c);
diff --git a/src/jloptions.h b/src/jloptions.h
index aac2a64a373a8..e58797caace3c 100644
--- a/src/jloptions.h
+++ b/src/jloptions.h
@@ -38,6 +38,7 @@ typedef struct {
     int8_t can_inline;
     int8_t polly;
     const char *trace_compile;
+    const char *trace_dispatch;
     int8_t fast_math;
     int8_t worker;
     const char *cookie;
@@ -62,6 +63,7 @@ typedef struct {
     int8_t permalloc_pkgimg;
     uint64_t heap_size_hint;
     int8_t trace_compile_timing;
+    int8_t trim;
 } jl_options_t;
 
 #endif
diff --git a/src/jltypes.c b/src/jltypes.c
index 78ecb9231df7e..11f1d11a14edc 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -594,6 +594,52 @@ static int simple_subtype(jl_value_t *a, jl_value_t *b, int hasfree, int isUnion
     return 0;
 }
 
+// merge Union{Tuple{}, Tuple{T}, Tuple{T, T, Vararg{T}}} into Tuple{Vararg{T}}
+// assumes temp is already sorted by number of type parameters
+STATIC_INLINE void merge_vararg_unions(jl_value_t **temp, size_t nt)
+{
+    for (size_t i = nt-1; i > 0; i--) {
+        // match types of form Tuple{T, ..., Vararg{T}}
+        jl_value_t *tt = temp[i];
+        if (!(tt && jl_is_tuple_type(tt))) continue;
+        size_t nfields = jl_nparams(tt);
+        if (nfields <= 1) continue;
+        jl_value_t *va = jl_tparam(tt, nfields-1);
+        if (jl_vararg_kind(va) != JL_VARARG_UNBOUND) continue;
+        jl_value_t *t = jl_unwrap_vararg(va);
+        for (size_t j = 0; j < nfields-1; j++)
+            if (!jl_egal(jl_tparam(tt, j), t)) goto outer_loop;
+
+        // look for Tuple{T, T, ...} then Tuple{T, ...}, etc
+        size_t min_elements = nfields-1;
+        for (long j = i-1; j >= 0; j--) {
+            jl_value_t *ttj = temp[j];
+            if (!jl_is_tuple_type(ttj)) break;
+            size_t nfieldsj = jl_nparams(ttj);
+            if (nfieldsj >= min_elements) continue;
+            if (nfieldsj != min_elements-1) break;
+            for (size_t k = 0; k < nfieldsj; k++)
+                if (!jl_egal(jl_tparam(ttj, k), t)) goto inner_loop;
+
+            temp[j] = NULL;
+            min_elements--;
+ inner_loop:
+            continue;
+        }
+
+        if (min_elements == nfields-1) continue;
+        jl_value_t** params;
+        JL_GC_PUSHARGS(params, min_elements+1);
+        for (size_t j = 0; j < min_elements; j++)
+            params[j] = t;
+        params[min_elements] = va;
+        temp[i] = jl_apply_type((jl_value_t*)jl_tuple_type, params, min_elements+1);
+        JL_GC_POP();
+ outer_loop:
+        continue;
+    }
+}
+
 JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
 {
     if (n == 0)
@@ -625,6 +671,7 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
         }
     }
     isort_union(temp, nt);
+    merge_vararg_unions(temp, nt);
     jl_value_t **ptu = &temp[nt];
     *ptu = jl_bottom_type;
     int k;
@@ -730,6 +777,7 @@ jl_value_t *simple_union(jl_value_t *a, jl_value_t *b)
         }
     }
     isort_union(temp, nt);
+    merge_vararg_unions(temp, nt);
     temp[nt] = jl_bottom_type;
     size_t k;
     for (k = nt; k-- > 0; ) {
@@ -1559,6 +1607,118 @@ jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u)
     return t;
 }
 
+// Create a copy of type expression t where any occurrence of data type x is replaced by y.
+// If x does not occur in t, return t without any copy.
+// For example, jl_substitute_datatype(Foo{Bar}, Foo{T}, Qux{S}) is Qux{Bar}, with T and S
+// free type variables.
+// To substitute type variables, use jl_substitute_var instead.
+jl_value_t *jl_substitute_datatype(jl_value_t *t, jl_datatype_t * x, jl_datatype_t * y)
+{
+    if jl_is_datatype(t) {
+        jl_datatype_t *typ = (jl_datatype_t*)t;
+        // For datatypes call itself recursively on the parameters to form new parameters.
+        // Then, if typename(t) == typename(x), rewrap the wrapper of y around the new
+        // parameters. Otherwise, do the same around the wrapper of t.
+        // This ensures that the types and supertype are properly set.
+        // Start by check whether there is a parameter that needs replacing.
+        long i_firstnewparam = -1;
+        size_t nparams = jl_svec_len(typ->parameters);
+        jl_value_t *firstnewparam = NULL;
+        JL_GC_PUSH1(&firstnewparam);
+        for (size_t i = 0; i < nparams; i++) {
+            jl_value_t *param = NULL;
+            JL_GC_PUSH1(&param);
+            param = jl_svecref(typ->parameters, i);
+            firstnewparam = jl_substitute_datatype(param, x, y);
+            if (param != firstnewparam) {
+                i_firstnewparam = i;
+                JL_GC_POP();
+                break;
+            }
+            JL_GC_POP();
+        }
+        // If one of the parameters needs to be updated, or if the type name is that to
+        // substitute, create a new datataype
+        if (i_firstnewparam != -1 || typ->name == x->name) {
+            jl_datatype_t *uw = typ->name == x->name ? y : typ; // substitution occurs here
+            jl_value_t *wrapper = uw->name->wrapper;
+            jl_datatype_t *w = (jl_datatype_t*)jl_unwrap_unionall(wrapper);
+            jl_svec_t *sv = jl_alloc_svec_uninit(jl_svec_len(uw->parameters));
+            JL_GC_PUSH1(&sv);
+            jl_value_t **vals = jl_svec_data(sv);
+            // no JL_GC_PUSHARGS(vals, ...) since GC is already aware of sv
+            for (long i = 0; i < i_firstnewparam; i++) { // copy the identical parameters
+                vals[i] = jl_svecref(typ->parameters, i); // value
+            }
+            if (i_firstnewparam != -1) { // insert the first non-identical parameter
+                vals[i_firstnewparam] = firstnewparam;
+            }
+            for (size_t i = i_firstnewparam+1; i < nparams; i++) { // insert the remaining parameters
+                vals[i] = jl_substitute_datatype(jl_svecref(typ->parameters, i), x, y);
+            }
+            if (jl_is_tuple_type(wrapper)) {
+                // special case for tuples, since the wrapper (Tuple) does not have as
+                // many parameters as t (it only has a Vararg instead).
+                t = jl_apply_tuple_type(sv, 0);
+            } else {
+                t = jl_instantiate_type_in_env((jl_value_t*)w, (jl_unionall_t*)wrapper, vals);
+            }
+            JL_GC_POP();
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_unionall(t) { // recursively call itself on body and var bounds
+        jl_unionall_t* ut = (jl_unionall_t*)t;
+        jl_value_t *lb = NULL;
+        jl_value_t *ub = NULL;
+        jl_value_t *body = NULL;
+        JL_GC_PUSH3(&lb, &ub, &body);
+        lb = jl_substitute_datatype(ut->var->lb, x, y);
+        ub = jl_substitute_datatype(ut->var->ub, x, y);
+        body = jl_substitute_datatype(ut->body, x, y);
+        if (lb != ut->var->lb || ub != ut->var->ub) {
+            jl_tvar_t *newtvar = jl_new_typevar(ut->var->name, lb, ub);
+            JL_GC_PUSH1(&newtvar);
+            body = jl_substitute_var(body, ut->var, (jl_value_t*)newtvar);
+            t = jl_new_struct(jl_unionall_type, newtvar, body);
+            JL_GC_POP();
+        }
+        else if (body != ut->body) {
+            t = jl_new_struct(jl_unionall_type, ut->var, body);
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_uniontype(t) { // recursively call itself on a and b
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        jl_value_t *a = NULL;
+        jl_value_t *b = NULL;
+        JL_GC_PUSH2(&a, &b);
+        a = jl_substitute_datatype(u->a, x, y);
+        b = jl_substitute_datatype(u->b, x, y);
+        if (a != u->a || b != u->b) {
+            t = jl_new_struct(jl_uniontype_type, a, b);
+        }
+        JL_GC_POP();
+    }
+    else if jl_is_vararg(t) { // recursively call itself on T
+        jl_vararg_t *vt = (jl_vararg_t*)t;
+        if (vt->T) { // vt->T could be NULL
+            jl_value_t *rT = NULL;
+            JL_GC_PUSH1(&rT);
+            rT = jl_substitute_datatype(vt->T, x, y);
+            if (rT != vt->T) {
+                jl_task_t *ct = jl_current_task;
+                t = jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+                jl_set_typetagof((jl_vararg_t *)t, jl_vararg_tag, 0);
+                ((jl_vararg_t *)t)->T = rT;
+                ((jl_vararg_t *)t)->N = vt->N;
+            }
+            JL_GC_POP();
+        }
+    }
+    return t;
+}
+
 static jl_value_t *lookup_type_stack(jl_typestack_t *stack, jl_datatype_t *tt, size_t ntp,
                                      jl_value_t **iparams)
 {
@@ -1913,7 +2073,7 @@ static jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *t, int check, int not
         t = normalize_unionalls(t);
         p = t;
         jl_value_t *tw = extract_wrapper(t);
-        if (tw && t != tw && jl_types_equal(t, tw))
+        if (tw && t != tw && !jl_has_free_typevars(t) && jl_types_equal(t, tw))
             t = tw;
         p = t;
         check = 0; // remember that checks are already done now
@@ -1997,7 +2157,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             // normalize types equal to wrappers (prepare for Typeofwrapper)
             jl_value_t *tw = extract_wrapper(pi);
             if (tw && tw != pi && (tn != jl_type_typename || jl_typeof(pi) == jl_typeof(tw)) &&
-                    jl_types_equal(pi, tw)) {
+                    !jl_has_free_typevars(pi) && jl_types_equal(pi, tw)) {
                 iparams[i] = tw;
                 if (p) jl_gc_wb(p, tw);
             }
@@ -2669,7 +2829,7 @@ jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n, int check, int nothrow
             if (valid) {
                 t = normalize_unionalls(t);
                 jl_value_t *tw = extract_wrapper(t);
-                if (tw && t != tw && jl_types_equal(t, tw))
+                if (tw && t != tw && !jl_has_free_typevars(t) && jl_types_equal(t, tw))
                     t = tw;
             }
         }
@@ -2847,25 +3007,26 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_typename_type->name->mt = jl_nonfunction_mt;
     jl_typename_type->super = jl_any_type;
     jl_typename_type->parameters = jl_emptysvec;
-    jl_typename_type->name->n_uninitialized = 15 - 2;
-    jl_typename_type->name->names = jl_perm_symsvec(15, "name", "module",
+    jl_typename_type->name->n_uninitialized = 16 - 2;
+    jl_typename_type->name->names = jl_perm_symsvec(16, "name", "module",
                                                     "names", "atomicfields", "constfields",
                                                     "wrapper", "Typeofwrapper", "cache", "linearcache",
                                                     "mt", "partial",
                                                     "hash", "n_uninitialized",
                                                     "flags", // "abstract", "mutable", "mayinlinealloc",
-                                                    "max_methods");
+                                                    "max_methods", "constprop_heuristic");
     const static uint32_t typename_constfields[1] = { 0x00003a27 }; // (1<<0)|(1<<1)|(1<<2)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13) ; TODO: put back (1<<3)|(1<<4) in this list
     const static uint32_t typename_atomicfields[1] = { 0x00000180 }; // (1<<7)|(1<<8)
     jl_typename_type->name->constfields = typename_constfields;
     jl_typename_type->name->atomicfields = typename_atomicfields;
     jl_precompute_memoized_dt(jl_typename_type, 1);
-    jl_typename_type->types = jl_svec(15, jl_symbol_type, jl_any_type /*jl_module_type*/,
+    jl_typename_type->types = jl_svec(16, jl_symbol_type, jl_any_type /*jl_module_type*/,
                                       jl_simplevector_type, jl_any_type/*jl_voidpointer_type*/, jl_any_type/*jl_voidpointer_type*/,
                                       jl_type_type, jl_type_type, jl_simplevector_type, jl_simplevector_type,
                                       jl_methtable_type, jl_any_type,
                                       jl_any_type /*jl_long_type*/, jl_any_type /*jl_int32_type*/,
                                       jl_any_type /*jl_uint8_type*/,
+                                      jl_any_type /*jl_uint8_type*/,
                                       jl_any_type /*jl_uint8_type*/);
 
     jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core, 0, 1);
@@ -2975,6 +3136,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_anytuple_type->layout = NULL;
 
     jl_typeofbottom_type->super = jl_wrap_Type(jl_bottom_type);
+    jl_typeofbottom_type->super->layout = jl_typeofbottom_type->layout; // the only abstract type with a layout
     jl_emptytuple_type = (jl_datatype_t*)jl_apply_tuple_type(jl_emptysvec, 0);
     jl_emptytuple = jl_gc_permobj(0, jl_emptytuple_type);
     jl_emptytuple_type->instance = jl_emptytuple;
@@ -3098,12 +3260,21 @@ void jl_init_types(void) JL_GC_DISABLED
     assert(jl_module_type->instance == NULL);
     jl_compute_field_offsets(jl_module_type);
 
+    jl_binding_partition_type =
+        jl_new_datatype(jl_symbol("BindingPartition"), core, jl_any_type, jl_emptysvec,
+                        jl_perm_symsvec(5, "restriction", "min_world", "max_world", "next", "reserved"),
+                        jl_svec(5, jl_uint64_type /* Special GC-supported union of Any and flags*/,
+                        jl_ulong_type, jl_ulong_type, jl_any_type/*jl_binding_partition_type*/, jl_ulong_type),
+                        jl_emptysvec, 0, 1, 0);
+    const static uint32_t binding_partition_atomicfields[] = { 0b01101 }; // Set fields 1, 3, 4 as atomic
+    jl_binding_partition_type->name->atomicfields = binding_partition_atomicfields;
+
     jl_binding_type =
         jl_new_datatype(jl_symbol("Binding"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(5, "value", "globalref", "owner", "ty", "flags"),
-                        jl_svec(5, jl_any_type, jl_any_type/*jl_globalref_type*/, jl_any_type/*jl_binding_type*/, jl_type_type, jl_uint8_type),
+                        jl_perm_symsvec(4, "globalref", "value", "partitions", "flags"),
+                        jl_svec(4, jl_any_type/*jl_globalref_type*/, jl_any_type, jl_binding_partition_type, jl_uint8_type),
                         jl_emptysvec, 0, 1, 0);
-    const static uint32_t binding_atomicfields[] = { 0x0015 }; // Set fields 1, 3, 4 as atomic
+    const static uint32_t binding_atomicfields[] = { 0x0005 }; // Set fields 1, 3 as atomic
     jl_binding_type->name->atomicfields = binding_atomicfields;
     const static uint32_t binding_constfields[] = { 0x0002 }; // Set fields 2 as constant
     jl_binding_type->name->constfields = binding_constfields;
@@ -3446,7 +3617,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             "backedges",
                             "cache",
                             "cache_with_orig",
-                            "precompiled"),
+                            "flags"),
                         jl_svec(7,
                             jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type),
                             jl_any_type,
@@ -3533,8 +3704,11 @@ void jl_init_types(void) JL_GC_DISABLED
                                        jl_emptysvec, 0, 0, 4);
 
     // all Kinds share the Type method table (not the nonfunction one)
-    jl_unionall_type->name->mt = jl_uniontype_type->name->mt = jl_datatype_type->name->mt =
-        jl_type_type_mt;
+    jl_unionall_type->name->mt =
+        jl_uniontype_type->name->mt =
+        jl_datatype_type->name->mt =
+        jl_typeofbottom_type->name->mt =
+            jl_type_type_mt;
 
     jl_intrinsic_type = jl_new_primitivetype((jl_value_t*)jl_symbol("IntrinsicFunction"), core,
                                              jl_builtin_type, jl_emptysvec, 32);
@@ -3609,6 +3783,8 @@ void jl_init_types(void) JL_GC_DISABLED
     XX(task);
     jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
     jl_svecset(jl_task_type->types, 0, listt);
+    const static uint32_t task_atomicfields[1] = {0x00001000}; // Set fields 13 as atomic
+    jl_task_type->name->atomicfields = task_atomicfields;
 
     tv = jl_svec2(tvar("A"), tvar("R"));
     jl_opaque_closure_type = (jl_unionall_t*)jl_new_datatype(jl_symbol("OpaqueClosure"), core, jl_function_type, tv,
@@ -3639,6 +3815,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_typename_type->types, 12, jl_int32_type);
     jl_svecset(jl_typename_type->types, 13, jl_uint8_type);
     jl_svecset(jl_typename_type->types, 14, jl_uint8_type);
+    jl_svecset(jl_typename_type->types, 15, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 4, jl_long_type);
     jl_svecset(jl_methtable_type->types, 5, jl_module_type);
     jl_svecset(jl_methtable_type->types, 6, jl_array_any_type);
@@ -3651,8 +3828,8 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_svecset(jl_method_instance_type->types, 4, jl_code_instance_type);
     jl_svecset(jl_code_instance_type->types, 15, jl_voidpointer_type);
     jl_svecset(jl_code_instance_type->types, 16, jl_voidpointer_type);
-    jl_svecset(jl_binding_type->types, 1, jl_globalref_type);
-    jl_svecset(jl_binding_type->types, 2, jl_binding_type);
+    jl_svecset(jl_binding_type->types, 0, jl_globalref_type);
+    jl_svecset(jl_binding_partition_type->types, 3, jl_binding_partition_type);
 
     jl_compute_field_offsets(jl_datatype_type);
     jl_compute_field_offsets(jl_typename_type);
@@ -3664,6 +3841,7 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_compute_field_offsets(jl_unionall_type);
     jl_compute_field_offsets(jl_simplevector_type);
     jl_compute_field_offsets(jl_symbol_type);
+    jl_compute_field_offsets(jl_binding_partition_type);
 
     // override ismutationfree for builtin types that are mutable for identity
     jl_string_type->ismutationfree = jl_string_type->isidentityfree = 1;
@@ -3755,7 +3933,7 @@ void post_boot_hooks(void)
     for (size_t i = 0; i < jl_svec_len(bindings); i++) {
         if (table[i] != jl_nothing) {
             jl_binding_t *b = (jl_binding_t*)table[i];
-            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+            jl_value_t *v = jl_get_binding_value(b);
             if (v) {
                 if (jl_is_unionall(v))
                     v = jl_unwrap_unionall(v);
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index 5636caa48e6e6..f1acb9c3250e1 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -225,6 +225,19 @@
                  (if lb (list lb ub) (list ub))
                  (if lb (list lb '(core Any)) '())))))
 
+(define (is-method? x)
+  (if (and (pair? x) (eq? (car x) 'method))
+      (let ((name (cadr x)))
+        (if (and (pair? name) (eq? (car name) 'globalref))
+            (let ((name (caddr name)))
+              (if (symbol? name)
+                  #t
+                  #f))
+            (if (symbol? name)
+                #t
+                #f)))
+      #f))
+
 (define (method-expr-name m)
   (let ((name (cadr m)))
       (cond ((globalref? name) (caddr name))
@@ -372,7 +385,7 @@
             (generator (if (expr-contains-p if-generated? body (lambda (x) (not (function-def? x))))
                            (let* ((gen    (generated-version body))
                                   (nongen (non-generated-version body))
-                                  (gname  (symbol (string (gensy) "#" (current-julia-module-counter))))
+                                  (gname  (symbol (string (gensy) "#" (current-julia-module-counter '()))))
                                   (gf     (make-generator-function gname names anames gen)))
                              (set! body (insert-after-meta
                                          nongen
@@ -512,7 +525,7 @@
                                         ""
                                         "#")
                                     (or und '_) "#"
-                                    (string (current-julia-module-counter)))))))
+                                    (string (current-julia-module-counter '())))))))
       ;; this is a hack: nest these statements inside a call so they get closure
       ;; converted together, allowing all needed types to be defined before any methods.
       `(call (core ifelse) (false) (false) (block
@@ -1251,7 +1264,7 @@
                    (list a)))
          ;; TODO: always use a specific special name like #anon# or _, then ignore
          ;; this as a local variable name.
-         (name (symbol (string "#" (current-julia-module-counter)))))
+         (name (symbol (string "#" (current-julia-module-counter '())))))
     (expand-forms
      `(block (local ,name)
              (function
@@ -2435,7 +2448,8 @@
      (let* ((argt  (something (list (expand-forms (cadr e)) #f)))
             (rt_lb (something (list (expand-forms (caddr e)) #f)))
             (rt_ub (something (list (expand-forms (cadddr e)) #f)))
-            (F     (caddddr e))
+            (allow-partial (caddddr e))
+            (F             (cadddddr e))
             (isva (let* ((arglist (function-arglist F))
                          (lastarg (and (pair? arglist) (last arglist))))
                     (if (and argt (any (lambda (arg)
@@ -2460,7 +2474,7 @@
        (let* ((argtype   (foldl (lambda (var ex) `(call (core UnionAll) ,var ,ex))
                                 (expand-forms `(curly (core Tuple) ,@argtypes))
                                 (reverse tvars))))
-         `(_opaque_closure ,(or argt argtype) ,rt_lb ,rt_ub ,isva ,(length argtypes) ,functionloc ,lam))))
+         `(_opaque_closure ,(or argt argtype) ,rt_lb ,rt_ub ,isva ,(length argtypes) ,allow-partial ,functionloc ,lam))))
 
    'block
    (lambda (e)
@@ -3536,9 +3550,9 @@ f(x) = yt(x)
 (define (clear-capture-bits vinfos)
   (map vinfo:not-capt vinfos))
 
-(define (convert-lambda lam fname interp capt-sp opaq)
+(define (convert-lambda lam fname interp capt-sp opaq parsed-method-stack)
   (let ((body (add-box-inits-to-body
-               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq (table) (vinfo-to-table (car (lam:vinfo lam)))))))
+               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq parsed-method-stack (table) (vinfo-to-table (car (lam:vinfo lam)))))))
     `(lambda ,(lam:args lam)
        (,(clear-capture-bits (car (lam:vinfo lam)))
         ()
@@ -3613,7 +3627,7 @@ f(x) = yt(x)
 ;; declared types.
 ;; when doing this, the original value needs to be preserved, to
 ;; ensure the expression `a=b` always returns exactly `b`.
-(define (convert-assignment var rhs0 fname lam interp opaq globals locals)
+(define (convert-assignment var rhs0 fname lam interp opaq parsed-method-stack globals locals)
   (cond
     ((symbol? var)
      (let* ((vi (get locals var #f))
@@ -3631,7 +3645,7 @@ f(x) = yt(x)
                                 (equal? rhs0 '(the_exception)))
                             rhs0
                             (make-ssavalue)))
-                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq (table) locals) #t lam))
+                  (rhs  (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq parsed-method-stack (table) locals) #t lam))
                   (ex (cond (closed `(call (core setfield!)
                                            ,(if interp
                                                 `($ ,var)
@@ -3915,17 +3929,17 @@ f(x) = yt(x)
 (define (toplevel-preserving? e)
   (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse))))
 
-(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
+(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq parsed-method-stack (globals (table)) (locals (table)))
   (if toplevel
       (map (lambda (x)
              (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined
                                                   (and toplevel (toplevel-preserving? x))
-                                                  interp opaq globals locals))))
+                                                  interp opaq parsed-method-stack globals locals))))
                (if (null? (cdr tl))
                    (car tl)
                    `(block ,@(cdr tl) ,(car tl)))))
            exprs)
-      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals locals)) exprs)))
+      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq parsed-method-stack globals locals)) exprs)))
 
 (define (prepare-lambda! lam)
   ;; mark all non-arguments as assigned, since locals that are never assigned
@@ -3934,11 +3948,17 @@ f(x) = yt(x)
             (list-tail (car (lam:vinfo lam)) (length (lam:args lam))))
   (lambda-optimize-vars! lam))
 
-(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)) (locals (table)))
+;; must start with a hash and second character must be numeric
+(define (anon-function-name? str)
+  (and (>= (string-length str) 2)
+       (char=? (string.char str 0) #\#)
+       (char-numeric? (string.char str 1))))
+
+(define (cl-convert- e fname lam namemap defined toplevel interp opaq parsed-method-stack (globals (table)) (locals (table)))
   (if (and (not lam)
            (not (and (pair? e) (memq (car e) '(lambda method macro opaque_closure)))))
       (if (atom? e) e
-          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals)))
+          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
       (cond
        ((symbol? e)
         (define (new-undef-var name)
@@ -3957,7 +3977,10 @@ f(x) = yt(x)
                  (val (if (equal? typ '(core Any))
                           val
                           `(call (core typeassert) ,val
-                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals locals)))))
+                                 ,(let ((convt (cl-convert typ fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
+                                    (if (or (symbol-like? convt) (quoted? convt))
+                                        convt
+                                        (renumber-assigned-ssavalues convt)))))))
             `(block
                ,@(if (eq? box access) '() `((= ,access ,box)))
                ,undefcheck
@@ -3989,8 +4012,8 @@ f(x) = yt(x)
            e)
           ((=)
            (let ((var (cadr e))
-                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals locals)))
-             (convert-assignment var rhs fname lam interp opaq globals locals)))
+                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
+             (convert-assignment var rhs fname lam interp opaq parsed-method-stack globals locals)))
           ((local-def) ;; make new Box for local declaration of defined variable
            (let ((vi (get locals (cadr e) #f)))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
@@ -4028,7 +4051,8 @@ f(x) = yt(x)
           ((_opaque_closure)
            (let* ((isva  (car (cddddr e)))
                   (nargs (cadr (cddddr e)))
-                  (functionloc (caddr (cddddr e)))
+                  (allow-partial (caddr (cddddr e)))
+                  (functionloc   (cadddr (cddddr e)))
                   (lam2  (last e))
                   (vis   (lam:vinfo lam2))
                   (cvs   (map car (cadr vis))))
@@ -4040,8 +4064,8 @@ f(x) = yt(x)
                                            v)))
                                    cvs)))
                `(new_opaque_closure
-                 ,(cadr e) ,(or (caddr e) '(call (core apply_type) (core Union))) ,(or (cadddr e) '(core Any)) (true)
-                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs)))
+                 ,(cadr e) ,(or (caddr e) '(call (core apply_type) (core Union))) ,(or (cadddr e) '(core Any)) ,allow-partial
+                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs) parsed-method-stack))
                  ,@var-exprs))))
           ((method)
            (let* ((name  (method-expr-name e))
@@ -4055,7 +4079,7 @@ f(x) = yt(x)
                   (sp-inits (if (or short (not (eq? (car sig) 'block)))
                                 '()
                                 (map-cl-convert (butlast (cdr sig))
-                                                fname lam namemap defined toplevel interp opaq globals locals)))
+                                                fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
                   (sig      (and sig (if (eq? (car sig) 'block)
                                          (last sig)
                                          sig))))
@@ -4082,22 +4106,22 @@ f(x) = yt(x)
                                           ;; anonymous functions with keyword args generate global
                                           ;; functions that refer to the type of a local function
                                           (rename-sig-types sig namemap)
-                                          fname lam namemap defined toplevel interp opaq globals locals)
+                                          fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
                                   ,(let ((body (add-box-inits-to-body
                                                 lam2
-                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq (table)
+                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq parsed-method-stack (table)
                                                             (vinfo-to-table (car (lam:vinfo lam2)))))))
                                      `(lambda ,(cadr lam2)
                                         (,(clear-capture-bits (car vis))
                                          ,@(cdr vis))
                                         ,body)))))
                        (else
-                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f)))
+                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f parsed-method-stack)))
                                (top-stmts (cdr exprs))
                                (newlam    (compact-and-renumber (linearize (car exprs)) 'none 0)))
                           `(toplevel-butfirst
                             (block ,@sp-inits
-                                   (method ,(cadr e) ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals locals)
+                                   (method ,(cadr e) ,(cl-convert sig fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
                                            ,(julia-bq-macro newlam)))
                             ,@top-stmts))))
 
@@ -4106,9 +4130,11 @@ f(x) = yt(x)
                         (type-name  (or (get namemap name #f)
                                         (and name
                                              (symbol (string (if (= (string.char (string name) 0) #\#)
-                                                                 ""
-                                                                 "#")
-                                                             name "#" (current-julia-module-counter))))))
+                                                                  (if (anon-function-name? (string name))
+                                                                    (string "#" (current-julia-module-counter parsed-method-stack))
+                                                                    name)
+                                                                  (string "#" name))
+                                                              "#" (current-julia-module-counter parsed-method-stack))))))
                         (alldefs (expr-find-all
                                   (lambda (ex) (and (length> ex 2) (eq? (car ex) 'method)
                                                     (not (eq? ex e))
@@ -4200,12 +4226,12 @@ f(x) = yt(x)
                                (append (map (lambda (gs tvar)
                                               (make-assignment gs `(call (core TypeVar) ',tvar (core Any))))
                                             closure-param-syms closure-param-names)
-                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals locals)
+                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)
                                                  ,(convert-lambda lam2
                                                                   (if iskw
                                                                       (caddr (lam:args lam2))
                                                                       (car (lam:args lam2)))
-                                                                  #f closure-param-names #f)))))))
+                                                                  #f closure-param-names #f parsed-method-stack)))))))
                         (mk-closure  ;; expression to make the closure
                          (let* ((var-exprs (map (lambda (v)
                                                   (let ((cv (assq v (cadr (lam:vinfo lam)))))
@@ -4239,7 +4265,7 @@ f(x) = yt(x)
                        (begin
                          (put! defined name #t)
                          `(toplevel-butfirst
-                           ,(convert-assignment name mk-closure fname lam interp opaq globals locals)
+                           ,(convert-assignment name mk-closure fname lam interp opaq parsed-method-stack globals locals)
                            ,@typedef
                            ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                            ,@sp-inits
@@ -4253,14 +4279,14 @@ f(x) = yt(x)
                                        (table)
                                        (table)
                                        (null? (cadr e)) ;; only toplevel thunks have 0 args
-                                       interp opaq globals (vinfo-to-table (car (lam:vinfo e))))))
+                                       interp opaq parsed-method-stack globals (vinfo-to-table (car (lam:vinfo e))))))
              `(lambda ,(cadr e)
                 (,(clear-capture-bits (car (lam:vinfo e)))
                  () ,@(cddr (lam:vinfo e)))
                 (block ,@body))))
           ;; remaining `::` expressions are type assertions
           ((|::|)
-           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals locals))
+           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))
           ;; remaining `decl` expressions are only type assertions if the
           ;; argument is global or a non-symbol.
           ((decl)
@@ -4278,13 +4304,20 @@ f(x) = yt(x)
                              (globaldecl ,ref ,(caddr e))
                              (null)))
                          `(call (core typeassert) ,@(cdr e))))
-                   fname lam namemap defined toplevel interp opaq globals locals))))
+                   fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))))
           ;; `with-static-parameters` expressions can be removed now; used only by analyze-vars
           ((with-static-parameters)
-           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals locals))
+           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))
           (else
            (cons (car e)
-                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals locals))))))))
+                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals))))))))
+
+;; wrapper for `cl-convert-`
+(define (cl-convert e fname lam namemap defined toplevel interp opaq (parsed-method-stack '()) (globals (table)) (locals (table)))
+  (if (is-method? e)
+      (let ((name (method-expr-name e)))
+        (cl-convert- e fname lam namemap defined toplevel interp opaq (cons name parsed-method-stack) globals locals))
+      (cl-convert- e fname lam namemap defined toplevel interp opaq parsed-method-stack globals locals)))
 
 (define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f))
 
@@ -4968,6 +5001,10 @@ f(x) = yt(x)
             ((≔ ⩴ ≕ :=)
              (error (string "unsupported assignment operator \"" (deparse (car e)) "\"")))
 
+            ;; bare :escape
+            ((escape)
+             (error (string "\"esc(...)\" used outside of macro expansion")))
+
             ((error)
              (error (cadr e)))
             (else
diff --git a/src/julia.expmap.in b/src/julia.expmap.in
index e5f9ee890205f..29366f6296a85 100644
--- a/src/julia.expmap.in
+++ b/src/julia.expmap.in
@@ -5,8 +5,8 @@
     asprintf;
     bitvector_*;
     ios_*;
-    arraylist_grow;
-    small_arraylist_grow;
+    arraylist_*;
+    small_arraylist_*;
     jl_*;
     ijl_*;
     _jl_mutex_*;
diff --git a/src/julia.h b/src/julia.h
index b4ff97daae150..651f313021f95 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -3,24 +3,6 @@
 #ifndef JULIA_H
 #define JULIA_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern int mmtk_object_is_managed_by_mmtk(void* addr);
-extern unsigned char mmtk_pin_object(void* obj);
-// FIXME: Pinning objects that get hashed in the ptrhash table
-// until we implement address space hashing.
-#ifdef MMTK_GC
-#define PTRHASH_PIN(key) mmtk_pin_object(key);
-#else
-#define PTRHASH_PIN(key)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
 #if defined(JL_LIBRARY_EXPORTS_INTERNAL) || defined(JL_LIBRARY_EXPORTS_CODEGEN)
 #define JL_LIBRARY_EXPORTS
 #endif
@@ -45,22 +27,10 @@ extern unsigned char mmtk_pin_object(void* obj);
 
 #include <setjmp.h>
 #ifndef _OS_WINDOWS_
-#  define jl_jmp_buf sigjmp_buf
-#  if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)
-#    define MAX_ALIGN 8
-#  elif defined(_CPU_AARCH64_)
-// int128 is 16 bytes aligned on aarch64
-#    define MAX_ALIGN 16
-#  elif defined(_P64)
-// Generically we assume MAX_ALIGN is sizeof(void*)
-#    define MAX_ALIGN 8
-#  else
-#    define MAX_ALIGN 4
-#  endif
+    #define jl_jmp_buf sigjmp_buf
 #else
-#  include "win32_ucontext.h"
-#  define jl_jmp_buf jmp_buf
-#  define MAX_ALIGN 8
+    #include "win32_ucontext.h"
+    #define jl_jmp_buf jmp_buf
 #endif
 
 // Define the largest size (bytes) of a properly aligned object that the
@@ -100,6 +70,7 @@ typedef struct _jl_tls_states_t *jl_ptls_t;
 #ifdef JL_LIBRARY_EXPORTS
 #include "uv.h"
 #endif
+#include "gc-interface.h"
 #include "julia_atomics.h"
 #include "julia_threads.h"
 #include "julia_assert.h"
@@ -108,6 +79,15 @@ typedef struct _jl_tls_states_t *jl_ptls_t;
 extern "C" {
 #endif
 
+// object pinning  ------------------------------------------------------------
+
+// FIXME: Pinning objects that get hashed in the ptrhash table
+// until we implement address space hashing.
+#define PTRHASH_PIN(key) jl_gc_pin_object(key);
+
+// Called when pinning objects that would cause an error if moved
+#define PTR_PIN(key) jl_gc_pin_object(key);
+
 // core data types ------------------------------------------------------------
 
 // the common fields are hidden before the pointer, but the following macro is
@@ -263,6 +243,7 @@ JL_DLLEXPORT extern const jl_callptr_t jl_f_opaque_closure_call_addr;
 JL_DLLEXPORT extern const jl_callptr_t jl_fptr_wait_for_compiled_addr;
 
 typedef struct _jl_line_info_node_t {
+    JL_DATA_TYPE
     struct _jl_module_t *module;
     jl_value_t *method; // may contain a jl_symbol, jl_method_t, or jl_method_instance_t
     jl_sym_t *file;
@@ -300,15 +281,17 @@ typedef union __jl_purity_overrides_t {
         uint16_t ipo_noub                : 1;
         uint16_t ipo_noub_if_noinbounds  : 1;
         uint16_t ipo_consistent_overlay  : 1;
+        uint16_t ipo_nortcall            : 1;
     } overrides;
     uint16_t bits;
 } _jl_purity_overrides_t;
 
-#define NUM_EFFECTS_OVERRIDES 10
+#define NUM_EFFECTS_OVERRIDES 11
 #define NUM_IR_FLAGS 13
 
 // This type describes a single function body
 typedef struct _jl_code_info_t {
+    JL_DATA_TYPE
     // ssavalue-indexed arrays of properties:
     jl_array_t *code;  // Any array of statements
     jl_debuginfo_t *debuginfo; // Table of edge data for each statement
@@ -317,16 +300,15 @@ typedef struct _jl_code_info_t {
         // 1 << 0 = inbounds region
         // 1 << 1 = callsite inline region
         // 1 << 2 = callsite noinline region
-        // 1 << 3 = throw block
-        // 1 << 4 = refined statement
-        // 1 << 5 = :consistent
-        // 1 << 6 = :effect_free
-        // 1 << 7 = :nothrow
-        // 1 << 8 = :terminates
-        // 1 << 9 = :noub
-        // 1 << 10 = :effect_free_if_inaccessiblememonly
-        // 1 << 11 = :inaccessiblemem_or_argmemonly
-        // 1 << 12-19 = callsite effects overrides
+        // 1 << 3 = refined statement
+        // 1 << 4 = :consistent
+        // 1 << 5 = :effect_free
+        // 1 << 6 = :nothrow
+        // 1 << 7 = :terminates
+        // 1 << 8 = :noub
+        // 1 << 9 = :effect_free_if_inaccessiblememonly
+        // 1 << 10 = :inaccessiblemem_or_argmemonly
+        // 1 << 11-19 = callsite effects overrides
     // miscellaneous data:
     jl_array_t *slotnames; // names of local variables
     jl_array_t *slotflags;  // local var bit flags
@@ -437,8 +419,14 @@ struct _jl_method_instance_t {
     jl_array_t *backedges; // list of method-instances which call this method-instance; `invoke` records (invokesig, caller) pairs
     _Atomic(struct _jl_code_instance_t*) cache;
     uint8_t cache_with_orig; // !cache_with_specTypes
-    _Atomic(uint8_t) precompiled; // true if this instance was generated by an explicit `precompile(...)` call
+
+    // flags for this method instance
+    //   bit 0: generated by an explicit `precompile(...)`
+    //   bit 1: dispatched
+    _Atomic(uint8_t) flags;
 };
+#define JL_MI_FLAGS_MASK_PRECOMPILED    0x01
+#define JL_MI_FLAGS_MASK_DISPATCHED     0x02
 
 // OpaqueClosure
 typedef struct _jl_opaque_closure_t {
@@ -549,6 +537,7 @@ typedef struct {
     uint8_t mayinlinealloc:1;
     uint8_t _reserved:5;
     uint8_t max_methods; // override for inference's max_methods setting (0 = no additional limit or relaxation)
+    uint8_t constprop_heustic; // override for inference's constprop heuristic
 } jl_typename_t;
 
 typedef struct {
@@ -639,19 +628,84 @@ typedef struct _jl_weakref_t {
     jl_value_t *value;
 } jl_weakref_t;
 
+enum jl_partition_kind {
+    // Constant: This binding partition is a constant declared using `const`
+    //  ->restriction holds the constant value
+    BINDING_KIND_CONST        = 0x0,
+    // Import Constant: This binding partition is a constant declared using `import A`
+    //  ->restriction holds the constant value
+    BINDING_KIND_CONST_IMPORT = 0x1,
+    // Global: This binding partition is a global variable.
+    //  -> restriction holds the type restriction
+    BINDING_KIND_GLOBAL       = 0x2,
+    // Implicit: The binding was implicitly imported from a `using`'d module.
+    //  ->restriction holds the imported binding
+    BINDING_KIND_IMPLICIT     = 0x3,
+    // Explicit: The binding was explicitly `using`'d by name
+    //  ->restriction holds the imported binding
+    BINDING_KIND_EXPLICIT     = 0x4,
+    // Imported: The binding was explicitly `import`'d by name
+    //  ->restriction holds the imported binding
+    BINDING_KIND_IMPORTED     = 0x5,
+    // Failed: We attempted to import the binding, but the import was ambiguous
+    //  ->restriction is NULL.
+    BINDING_KIND_FAILED       = 0x6,
+    // Declared: The binding was declared using `global` or similar
+    //  ->restriction is NULL.
+    BINDING_KIND_DECLARED     = 0x7,
+    // Guard: The binding was looked at, but no global or import was resolved at the time
+    //  ->restriction is NULL.
+    BINDING_KIND_GUARD        = 0x8
+};
+
+#ifdef _P64
+// Union of a ptr and a 3 bit field.
+typedef uintptr_t jl_ptr_kind_union_t;
+#else
+typedef struct __attribute__((aligned(8))) { jl_value_t *val; size_t kind; } jl_ptr_kind_union_t;
+#endif
+typedef struct __attribute__((aligned(8))) _jl_binding_partition_t {
+    JL_DATA_TYPE
+    /* union {
+     *   // For ->kind == BINDING_KIND_GLOBAL
+     *   jl_value_t *type_restriction;
+     *   // For ->kind == BINDING_KIND_CONST(_IMPORT)
+     *   jl_value_t *constval;
+     *   // For ->kind in (BINDING_KIND_IMPLICIT, BINDING_KIND_EXPLICIT, BINDING_KIND_IMPORT)
+     *   jl_binding_t *imported;
+     * } restriction;
+     *
+     * Currently: Low 3 bits hold ->kind on _P64 to avoid needing >8 byte atomics
+     *
+     * This field is updated atomically with both kind and restriction. The following
+     * transitions are allowed and modeled by the system:
+     *
+     *  GUARD -> any
+     *  (DECLARED, FAILED) -> any non-GUARD
+     *  IMPLICIT -> {EXPLICIT, IMPORTED} (->restriction unchanged only)
+     *
+     * In addition, we permit (with warning about undefined behavior) changing the restriction
+     * pointer for CONST(_IMPORT).
+     *
+     * All other kind or restriction transitions are disallowed.
+     */
+    _Atomic(jl_ptr_kind_union_t) restriction;
+    size_t min_world;
+    _Atomic(size_t) max_world;
+    _Atomic(struct _jl_binding_partition_t*) next;
+    size_t reserved; // Reserved for ->kind. Currently this holds the low bits of ->restriction during serialization
+} jl_binding_partition_t;
+
 typedef struct _jl_binding_t {
     JL_DATA_TYPE
-    _Atomic(jl_value_t*) value;
     jl_globalref_t *globalref;  // cached GlobalRef for this binding
-    _Atomic(struct _jl_binding_t*) owner;  // for individual imported bindings (NULL until 'resolved')
-    _Atomic(jl_value_t*) ty;  // binding type
-    uint8_t constp:1;
+    _Atomic(jl_value_t*) value;
+    _Atomic(jl_binding_partition_t*) partitions;
+    uint8_t declared:1;
     uint8_t exportp:1; // `public foo` sets `publicp`, `export foo` sets both `publicp` and `exportp`
     uint8_t publicp:1; // exportp without publicp is not allowed.
-    uint8_t imported:1;
-    uint8_t usingfailed:1;
     uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
-    uint8_t padding:1;
+    uint8_t padding:3;
 } jl_binding_t;
 
 typedef struct {
@@ -665,6 +719,8 @@ typedef struct _jl_module_t {
     struct _jl_module_t *parent;
     _Atomic(jl_svec_t*) bindings;
     _Atomic(jl_genericmemory_t*) bindingkeyset; // index lookup by name into bindings
+    jl_sym_t *file;
+    int32_t line;
     // hidden fields:
     arraylist_t usings;  // modules with all bindings potentially imported
     jl_uuid_t build_id;
@@ -681,6 +737,7 @@ typedef struct _jl_module_t {
 } jl_module_t;
 
 struct _jl_globalref_t {
+    JL_DATA_TYPE
     jl_module_t *mod;
     jl_sym_t *name;
     jl_binding_t *binding;
@@ -810,7 +867,7 @@ static inline jl_value_t *jl_to_typeof(uintptr_t t)
     return (jl_value_t*)t;
 }
 #else
-extern JL_DLLEXPORT jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
+extern JL_HIDDEN jl_datatype_t *ijl_small_typeof[(jl_max_tags << 4) / sizeof(jl_datatype_t*)];
 static inline jl_value_t *jl_to_typeof(uintptr_t t)
 {
     if (t < (jl_max_tags << 4))
@@ -943,6 +1000,7 @@ extern JL_DLLIMPORT jl_value_t *jl_memoryref_uint8_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_value_t *jl_memoryref_any_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_expr_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_binding_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_binding_partition_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_globalref_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_linenumbernode_type JL_GLOBALLY_ROOTED;
 extern JL_DLLIMPORT jl_datatype_t *jl_gotonode_type JL_GLOBALLY_ROOTED;
@@ -1064,36 +1122,14 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT;
 
 #endif
 
-JL_DLLEXPORT int jl_gc_enable(int on);
-JL_DLLEXPORT int jl_gc_is_enabled(void);
-
-typedef enum {
-    JL_GC_AUTO = 0,         // use heuristics to determine the collection type
-    JL_GC_FULL = 1,         // force a full collection
-    JL_GC_INCREMENTAL = 2,  // force an incremental collection
-} jl_gc_collection_t;
-
-JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t);
-
 JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_add_quiescent(jl_ptls_t ptls, void **v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_finalize(jl_value_t *o);
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
-JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz);
 JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, struct _jl_task_t *owner) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz);
-JL_DLLEXPORT void jl_gc_use(jl_value_t *a);
-// Set GC memory trigger in bytes for greedy memory collecting
-JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem);
-JL_DLLEXPORT uint64_t jl_gc_get_max_memory(void);
-
-JL_DLLEXPORT void jl_clear_malloc_data(void);
 
 // GC write barriers
-JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const void *stored, jl_datatype_t *dt) JL_NOTSAFEPOINT;
-
 #ifndef MMTK_GC
 STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
 {
@@ -1124,7 +1160,6 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
     if (ly->npointers)
         jl_gc_queue_multiroot((jl_value_t*)parent, ptr, dt);
 }
-
 #else  // MMTK_GC
 STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT;
 
@@ -1144,7 +1179,6 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
 }
 #endif // MMTK_GC
 
-JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
 JL_DLLEXPORT void jl_gc_safepoint(void);
 JL_DLLEXPORT int jl_safepoint_suspend_thread(int tid, int waitstate);
 JL_DLLEXPORT void jl_safepoint_suspend_all_threads(struct _jl_task_t *ct);
@@ -1532,6 +1566,7 @@ static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
 #define jl_is_slotnumber(v)  jl_typetagis(v,jl_slotnumber_type)
 #define jl_is_expr(v)        jl_typetagis(v,jl_expr_type)
 #define jl_is_binding(v)     jl_typetagis(v,jl_binding_type)
+#define jl_is_binding_partition(v) jl_typetagis(v,jl_binding_partition_type)
 #define jl_is_globalref(v)   jl_typetagis(v,jl_globalref_type)
 #define jl_is_gotonode(v)    jl_typetagis(v,jl_gotonode_type)
 #define jl_is_gotoifnot(v)   jl_typetagis(v,jl_gotoifnot_type)
@@ -1830,10 +1865,9 @@ JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_gensym(void);
 JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len);
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void);
-JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
-                                                 jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp,
-                                                 jl_binding_t *bnd);
+JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name);
 JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
 JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo, size_t world, jl_code_instance_t **cache);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
@@ -1994,8 +2028,8 @@ JL_DLLEXPORT jl_value_t *jl_checked_swap(jl_binding_t *b, jl_module_t *mod, jl_s
 JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *expected, jl_value_t *rhs);
 JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs);
 JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED);
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var);
-JL_DLLEXPORT void jl_declare_constant_val(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val);
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_module_t *mod, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED, enum jl_partition_kind) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from);
 JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
 JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
@@ -2301,11 +2335,7 @@ typedef struct _jl_task_t {
     // current exception handler
     jl_handler_t *eh;
     // saved thread state
-    jl_ucontext_t ctx;
-    void *stkbuf; // malloc'd memory (either copybuf or stack)
-    size_t bufsz; // actual sizeof stkbuf
-    unsigned int copy_stack:31; // sizeof stack for copybuf
-    unsigned int started:1;
+    jl_ucontext_t ctx; // pointer into stkbuf, if suspended
 } jl_task_t;
 
 #define JL_TASK_STATE_RUNNABLE 0
@@ -2318,7 +2348,6 @@ JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow(void);
-JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct);
 JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
@@ -2585,6 +2614,11 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT;
 #define JL_OPTIONS_USE_PKGIMAGES_YES 1
 #define JL_OPTIONS_USE_PKGIMAGES_NO 0
 
+#define JL_TRIM_NO 0
+#define JL_TRIM_SAFE 1
+#define JL_TRIM_UNSAFE 2
+#define JL_TRIM_UNSAFE_WARN 3
+
 // Version information
 #include <julia_version.h> // Generated file
 
@@ -2632,21 +2666,21 @@ typedef struct {
     int gcstack_arg; // Pass the ptls value as an argument with swiftself
 
     int use_jlplt; // Whether to use the Julia PLT mechanism or emit symbols directly
+    int trim; // can we emit dynamic dispatches?
     // Cache access. Default: jl_rettype_inferred_native.
     jl_codeinstance_lookup_t lookup;
 } jl_cgparams_t;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
+extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
 
 typedef struct {
     int emit_metadata;
 } jl_emission_params_t;
 
 #ifdef MMTK_GC
+
 extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
 extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
-extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator);
-
-extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
 
 // These need to be constants.
 
@@ -2662,12 +2696,14 @@ extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
 #define MMTK_DEFAULT_IMMIX_ALLOCATOR (0)
 #define MMTK_IMMORTAL_BUMP_ALLOCATOR (0)
 
+extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS;
+
 // Directly call into MMTk for write barrier (debugging only)
 STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    mmtk_object_reference_write_post(&ptls->mmtk_mutator, parent, ptr);
+    mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr);
 }
 
 // Inlined fastpath
@@ -2681,7 +2717,7 @@ STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSA
         if (((byte_val >> shift) & 1) == 1) {
             jl_task_t *ct = jl_current_task;
             jl_ptls_t ptls = ct->ptls;
-            mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr);
+            mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr);
         }
     }
 }
@@ -2691,54 +2727,6 @@ STATIC_INLINE void mmtk_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOI
     mmtk_gc_wb_fast(parent, ptr);
 }
 
-#define MMTK_MIN_ALIGNMENT 4
-// MMTk assumes allocation size is aligned to min alignment.
-STATIC_INLINE size_t mmtk_align_alloc_sz(size_t sz) JL_NOTSAFEPOINT
-{
-    return (sz + MMTK_MIN_ALIGNMENT - 1) & ~(MMTK_MIN_ALIGNMENT - 1);
-}
-
-STATIC_INLINE void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* cursor, uintptr_t limit, size_t size, size_t align, size_t offset, int allocator) {
-    intptr_t delta = (-offset - *cursor) & (align - 1);
-    uintptr_t result = *cursor + (uintptr_t)delta;
-
-    if (__unlikely(result + size > limit)) {
-        return (void*) mmtk_alloc(mutator, size, align, offset, allocator);
-    } else{
-        *cursor = result + size;
-        return (void*)result;
-    }
-}
-
-STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
-    ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR];
-    return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0);
-}
-
-STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
-    // We do not need post alloc for immix objects in immix/stickyimmix
-}
-
-STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) {
-    BumpAllocator* allocator = &mutator->allocators.bump_pointer[MMTK_IMMORTAL_BUMP_ALLOCATOR];
-    return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1);
-}
-
-STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
-    if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
-        intptr_t addr = (intptr_t) obj;
-        uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
-        intptr_t shift = (addr >> 3) & 0b111;
-        while(1) {
-            uint8_t old_val = *meta_addr;
-            uint8_t new_val = old_val | (1 << shift);
-            if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) {
-                break;
-            }
-        }
-    }
-}
-
 #endif
 
 #ifdef __cplusplus
diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h
index 1c0929717b293..1f35d3693fefd 100644
--- a/src/julia_fasttls.h
+++ b/src/julia_fasttls.h
@@ -22,14 +22,9 @@ extern "C" {
 
 typedef struct _jl_gcframe_t jl_gcframe_t;
 
-#if defined(_OS_DARWIN_)
-#include <pthread.h>
-typedef void *(jl_get_pgcstack_func)(pthread_key_t); // aka typeof(pthread_getspecific)
-#else
 typedef jl_gcframe_t **(jl_get_pgcstack_func)(void);
-#endif
 
-#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_)
 #define JULIA_DEFINE_FAST_TLS                                                                   \
 static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec;  \
 JL_DLLEXPORT _Atomic(char) jl_pgcstack_static_semaphore;                                        \
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 530dc3db8e567..6fd537ed6baf8 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -3,22 +3,6 @@
 #ifndef JL_INTERNAL_H
 #define JL_INTERNAL_H
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern int mmtk_object_is_managed_by_mmtk(void* addr);
-extern unsigned char mmtk_pin_object(void* obj);
-#ifdef MMTK_GC
-#define PTR_PIN(key) mmtk_pin_object(key);
-#else
-#define PTR_PIN(key)
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
 #include "options.h"
 #include "julia_assert.h"
 #include "julia_locks.h"
@@ -29,10 +13,13 @@ extern unsigned char mmtk_pin_object(void* obj);
 #include "support/strtod.h"
 #include "gc-alloc-profiler.h"
 #include "support/rle.h"
+#include <ctype.h>
 #include <stdint.h>
 #include <uv.h>
 #include <llvm-c/Types.h>
 #include <llvm-c/Orc.h>
+#include <llvm-version.h>
+
 #if !defined(_WIN32)
 #include <unistd.h>
 #else
@@ -79,7 +66,8 @@ static inline void asan_unpoison_task_stack(jl_task_t *ct, jl_jmp_buf *buf)
        that we're resetting to. The idea is to remove the poison from the frames
        that we're skipping over, since they won't be unwound. */
     uintptr_t top = jmpbuf_sp(buf);
-    uintptr_t bottom = (uintptr_t)ct->stkbuf;
+    uintptr_t bottom = (uintptr_t)(ct->ctx.copy_stack ? (char*)ct->ptls->stackbase  - ct->ptls->stacksize : (char*)ct->ctx.stkbuf);
+    //uintptr_t bottom = (uintptr_t)&top;
     __asan_unpoison_stack_memory(bottom, top - bottom);
 }
 static inline void asan_unpoison_stack_memory(uintptr_t addr, size_t size) {
@@ -114,6 +102,26 @@ JL_DLLIMPORT void __tsan_destroy_fiber(void *fiber);
 JL_DLLIMPORT void __tsan_switch_to_fiber(void *fiber, unsigned flags);
 #endif
 
+#ifndef _OS_WINDOWS_
+    #if defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_WASM_)
+        #define MAX_ALIGN 8
+    #elif defined(_CPU_AARCH64_) || (JL_LLVM_VERSION >= 180000 && (defined(_CPU_X86_64_) || defined(_CPU_X86_)))
+    // int128 is 16 bytes aligned on aarch64 and on x86 with LLVM >= 18
+        #define MAX_ALIGN 16
+    #elif defined(_P64)
+    // Generically we assume MAX_ALIGN is sizeof(void*)
+        #define MAX_ALIGN 8
+    #else
+        #define MAX_ALIGN 4
+    #endif
+#else
+    #if JL_LLVM_VERSION >= 180000
+        #define MAX_ALIGN 16
+    #else
+        #define MAX_ALIGN 8
+    #endif
+#endif
+
 #ifndef alignof
 #  ifndef __cplusplus
 #    ifdef __GNUC__
@@ -327,7 +335,7 @@ static inline void memassign_safe(int hasptr, char *dst, const jl_value_t *src,
     memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
 }
 
-// -- gc.c -- //
+// -- GC -- //
 
 #define GC_CLEAN  0 // freshly allocated
 #define GC_MARKED 1 // reachable and young
@@ -359,28 +367,17 @@ extern jl_function_t *jl_typeinf_func JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT size_t jl_typeinf_world;
 extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
 
+void free_stack(void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
+
 JL_DLLEXPORT extern int jl_lineno;
 JL_DLLEXPORT extern const char *jl_filename;
 
-jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset,
+jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset,
                                    int osize);
 jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
-#ifdef MMTK_GC
-JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void* ty);
-JL_DLLIMPORT jl_value_t *jl_mmtk_gc_alloc_big(jl_ptls_t ptls, size_t allocsz);
-JL_DLLIMPORT extern void mmtk_post_alloc(void* mutator, void* obj, size_t bytes, int allocator);
-JL_DLLIMPORT extern void mmtk_initialize_collection(void* tls);
-#endif // MMTK_GC
 JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
-extern uv_mutex_t gc_perm_lock;
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
-    unsigned align, unsigned offset) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero,
-    unsigned align, unsigned offset) JL_NOTSAFEPOINT;
 void gc_sweep_sysimg(void);
 
-void jl_gc_notify_image_load(const char* img_data, size_t len);
-void jl_gc_notify_image_alloc(char* img_data, size_t len);
 
 // pools are 16376 bytes large (GC_POOL_SZ - GC_PAGE_OFFSET)
 static const int jl_gc_sizeclasses[] = {
@@ -429,7 +426,7 @@ static const int jl_gc_sizeclasses[] = {
 #ifdef GC_SMALL_PAGE
 #ifdef _P64
 #  define JL_GC_N_POOLS 39
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
 #  define JL_GC_N_POOLS 40
 #else
 #  define JL_GC_N_POOLS 41
@@ -437,7 +434,7 @@ static const int jl_gc_sizeclasses[] = {
 #else
 #ifdef _P64
 #  define JL_GC_N_POOLS 49
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
 #  define JL_GC_N_POOLS 50
 #else
 #  define JL_GC_N_POOLS 51
@@ -452,7 +449,7 @@ STATIC_INLINE int jl_gc_alignment(size_t sz) JL_NOTSAFEPOINT
 #ifdef _P64
     (void)sz;
     return 16;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
     return sz <= 4 ? 8 : 16;
 #else
     // szclass 8
@@ -484,7 +481,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz) JL_NOTSAFEPOINT
     if (sz <= 8)
         return 0;
     const int N = 0;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
     if (sz <= 8)
         return (sz >= 4 ? 1 : 0);
     const int N = 1;
@@ -502,7 +499,7 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
     if (sz >= 16 && sz <= 152) {
 #ifdef _P64
         const int N = 0;
-#elif MAX_ALIGN == 8
+#elif MAX_ALIGN > 4
         const int N = 1;
 #else
         const int N = 2;
@@ -518,51 +515,6 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz) JL_NOTSAFE
 #define GC_MAX_SZCLASS (2032-sizeof(void*))
 static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, "");
 
-#ifndef MMTK_GC
-
-// Size does NOT include the type tag!!
-STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
-{
-    jl_value_t *v;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-        int pool_id = jl_gc_szclass(allocsz);
-        jl_gc_pool_t *p = &ptls->gc_tls.heap.norm_pools[pool_id];
-        int osize = jl_gc_sizeclasses[pool_id];
-        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
-        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
-        v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-        v = jl_gc_big_alloc_noinline(ptls, allocsz);
-    }
-    jl_set_typeof(v, ty);
-    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
-    return v;
-}
-
-#else  // MMTK_GC
-
-STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
-{
-    jl_value_t *v;
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    if (sz <= GC_MAX_SZCLASS) {
-        v = jl_mmtk_gc_alloc_default(ptls, allocsz, 16, ty);
-    }
-    else {
-        if (allocsz < sz) // overflow in adding offs, size was "negative"
-            jl_throw(jl_memory_exception);
-        v = jl_mmtk_gc_alloc_big(ptls, allocsz);
-    }
-    jl_set_typeof(v, ty);
-    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
-    return v;
-}
-#endif // MMTK_GC
-
 /* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a
  * gc frame, until it has been fully initialized. An uninitialized value in a
  * gc frame can crash upon encountering the first safepoint. By delaying use of
@@ -589,31 +541,9 @@ JL_DLLEXPORT uintptr_t jl_get_buff_tag(void) JL_NOTSAFEPOINT;
 typedef void jl_gc_tracked_buffer_t; // For the benefit of the static analyzer
 STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
 {
-    jl_gc_tracked_buffer_t *buf = jl_gc_alloc(ptls, sz, (void*)jl_buff_tag);
-    // For array objects with an owner point (a->flags.how == 3), we would need to
-    // introspect the object to update the a->data field. To avoid doing that and
-    // making scan_object much more complex we simply enforce that both owner and
-    // buffers are always pinned
-    PTR_PIN(buf);
-    return buf;
+    return jl_gc_alloc(ptls, sz, (void*)jl_buff_tag);
 }
 
-STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
-{
-    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
-                                                 sizeof(void*) * 2 : 16));
-    jl_taggedvalue_t *o = (jl_taggedvalue_t*)jl_gc_perm_alloc(allocsz, 0, align,
-                                                              sizeof(void*) % align);
-    // Possibly we do not need this for MMTk. We could declare a post_alloc func and define it differently in two GCs.
-    uintptr_t tag = (uintptr_t)ty;
-    o->header = tag | GC_OLD_MARKED;
-#ifdef MMTK_GC
-    jl_ptls_t ptls = jl_current_task->ptls;
-    mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(o), allocsz);
-#endif
-    return jl_valueof(o);
-}
 jl_value_t *jl_permbox8(jl_datatype_t *t, uintptr_t tag, uint8_t x);
 jl_value_t *jl_permbox32(jl_datatype_t *t, uintptr_t tag, uint32_t x);
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
@@ -649,14 +579,6 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
 #endif
 #endif
 
-JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
-
-JL_DLLEXPORT void JL_NORETURN jl_throw_out_of_memory_error(void);
-
-
-JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
-void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT;
 void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT;
 size_t jl_genericmemory_nbytes(jl_genericmemory_t *a) JL_NOTSAFEPOINT;
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
@@ -665,10 +587,6 @@ void jl_gc_run_all_finalizers(jl_task_t *ct);
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
 void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;
 
-void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT;
-
-JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT;
-
 void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT;
 void jl_print_gc_stats(JL_STREAM *s);
@@ -724,8 +642,9 @@ JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
         size_t min_world, size_t max_world, jl_debuginfo_t *edges);
-jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
+JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
 JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache);
 
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner);
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
@@ -743,6 +662,7 @@ JL_DLLEXPORT const char *jl_debuginfo_name(jl_value_t *func) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_compile_method_instance(jl_method_instance_t *mi, jl_tupletype_t *types, size_t world);
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types);
+JL_DLLEXPORT int jl_add_entrypoint(jl_tupletype_t *types);
 jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
 jl_value_t *jl_code_or_ci_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT, size_t world);
 int jl_code_requires_compiler(jl_code_info_t *src, int include_force_compile);
@@ -765,7 +685,7 @@ int jl_valid_type_param(jl_value_t *v);
 
 JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t nargs);
 
-void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world);
+void JL_NORETURN jl_method_error(jl_value_t *F, jl_value_t **args, size_t na, size_t world);
 JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
 
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
@@ -830,6 +750,7 @@ jl_unionall_t *jl_rename_unionall(jl_unionall_t *u);
 JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
 JL_DLLEXPORT jl_value_t *jl_rewrap_unionall_(jl_value_t *t, jl_value_t *u);
+jl_value_t* jl_substitute_datatype(jl_value_t *t, jl_datatype_t * x, jl_datatype_t * y);
 int jl_count_union_components(jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT;
@@ -866,7 +787,7 @@ JL_DLLEXPORT int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
 int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 
 JL_DLLEXPORT void jl_eval_const_decl(jl_module_t *m, jl_value_t *arg, jl_value_t *val);
-void jl_binding_set_type(jl_binding_t *b, jl_value_t *ty, int error);
+void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_value_t *ty);
 void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type);
 JL_DLLEXPORT void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type);
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded, const char **toplevel_filename, int *toplevel_lineno);
@@ -912,6 +833,7 @@ extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
 extern JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTED;
 extern jl_genericmemory_t *jl_global_roots_list JL_GLOBALLY_ROOTED;
 extern jl_genericmemory_t *jl_global_roots_keyset JL_GLOBALLY_ROOTED;
+extern arraylist_t *jl_entrypoint_mis;
 JL_DLLEXPORT int jl_is_globally_rooted(jl_value_t *val JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_as_global_root(jl_value_t *val, int insert) JL_GLOBALLY_ROOTED;
 
@@ -921,12 +843,117 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name
     int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva, int isinferred);
 JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source);
 
+EXTERN_INLINE_DECLARE enum jl_partition_kind decode_restriction_kind(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    uint8_t bits = (pku & 0x7);
+    jl_value_t *val = (jl_value_t*)(pku & ~0x7);
+
+    if (val == NULL && bits == BINDING_KIND_IMPLICIT) {
+        return BINDING_KIND_GUARD;
+    }
+
+    return (enum jl_partition_kind)bits;
+#else
+    return (enum jl_partition_kind)pku.kind;
+#endif
+}
+
+STATIC_INLINE jl_value_t *decode_restriction_value(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    jl_value_t *val = (jl_value_t*)(pku & ~0x7);
+    // This is a little bit of a lie at the moment - it is one of the things that
+    // can go wrong with binding replacement.
+    JL_GC_PROMISE_ROOTED(val);
+    return val;
+#else
+    return pku.val;
+#endif
+}
+
+STATIC_INLINE jl_ptr_kind_union_t encode_restriction(jl_value_t *val, enum jl_partition_kind kind) JL_NOTSAFEPOINT
+{
+#ifdef _P64
+    if (kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED || kind == BINDING_KIND_FAILED)
+        assert(val == NULL);
+    if (kind == BINDING_KIND_GUARD)
+        kind = BINDING_KIND_IMPLICIT;
+    assert((((uintptr_t)val) & 0x7) == 0);
+    return ((jl_ptr_kind_union_t)val) | kind;
+#else
+    jl_ptr_kind_union_t ret = { val, kind };
+    return ret;
+#endif
+}
+
+STATIC_INLINE int jl_bkind_is_some_import(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_IMPLICIT || kind == BINDING_KIND_EXPLICIT || kind == BINDING_KIND_IMPORTED;
+}
+
+STATIC_INLINE int jl_bkind_is_some_constant(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_CONST || kind == BINDING_KIND_CONST_IMPORT;
+}
+
+STATIC_INLINE int jl_bkind_is_some_guard(enum jl_partition_kind kind) JL_NOTSAFEPOINT {
+    return kind == BINDING_KIND_FAILED || kind == BINDING_KIND_GUARD || kind == BINDING_KIND_DECLARED;
+}
+
+EXTERN_INLINE_DECLARE jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) JL_NOTSAFEPOINT {
+    if (!b)
+        return NULL;
+    assert(jl_is_binding(b));
+    return jl_atomic_load_relaxed(&b->partitions);
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr, size_t world);
+
+EXTERN_INLINE_DECLARE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT {
+    return decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
+}
+
+STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world) JL_NOTSAFEPOINT;
+
+#ifndef __clang_analyzer__
+STATIC_INLINE jl_ptr_kind_union_t jl_walk_binding_inplace(jl_binding_t **bnd, jl_binding_partition_t **bpart, size_t world) JL_NOTSAFEPOINT
+{
+    while (1) {
+        if (!*bpart)
+            return encode_restriction(NULL, BINDING_KIND_GUARD);
+        jl_ptr_kind_union_t pku = jl_atomic_load_acquire(&(*bpart)->restriction);
+        if (!jl_bkind_is_some_import(decode_restriction_kind(pku)))
+            return pku;
+        *bnd = (jl_binding_t*)decode_restriction_value(pku);
+        *bpart = jl_get_binding_partition(*bnd, world);
+    }
+}
+#endif
+
+STATIC_INLINE int is10digit(char c) JL_NOTSAFEPOINT
+{
+    return (c >= '0' && c <= '9');
+}
+
 STATIC_INLINE int is_anonfn_typename(char *name)
 {
     if (name[0] != '#' || name[1] == '#')
         return 0;
     char *other = strrchr(name, '#');
-    return other > &name[1] && other[1] > '0' && other[1] <= '9';
+    return other > &name[1] && is10digit(other[1]);
+}
+
+// Returns true for typenames of anounymous functions that have been canonicalized (i.e.
+// we mangled the name of the outermost enclosing function in their name).
+STATIC_INLINE int is_canonicalized_anonfn_typename(char *name) JL_NOTSAFEPOINT
+{
+    char *delim = strchr(&name[1], '#');
+    if (delim == NULL)
+        return 0;
+    if (delim[1] != '#')
+        return 0;
+    if (!is10digit(delim[2]))
+        return 0;
+    return 1;
 }
 
 // Each tuple can exist in one of 4 Vararg states:
@@ -1008,10 +1035,7 @@ void jl_init_tasks(void) JL_GC_DISABLED;
 void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo) JL_NOTSAFEPOINT;
 jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
 void jl_init_serializer(void);
-void jl_gc_init(void);
 void jl_init_uv(void);
-void jl_init_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT;
-void jl_deinit_thread_heap(jl_ptls_t ptls) JL_NOTSAFEPOINT;
 void jl_init_int32_int64_cache(void);
 JL_DLLEXPORT void jl_init_options(void);
 
@@ -1021,9 +1045,9 @@ extern JL_DLLEXPORT ssize_t jl_tls_offset;
 extern JL_DLLEXPORT const int jl_tls_elf_support;
 void jl_init_threading(void);
 void jl_start_threads(void);
-extern uv_mutex_t safepoint_lock;
 
 // Whether the GC is running
+extern uv_mutex_t safepoint_lock;
 extern char *jl_safepoint_pages;
 STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
 {
@@ -1031,7 +1055,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
     return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 4;
 }
 extern _Atomic(uint32_t) jl_gc_running;
-extern JL_DLLEXPORT _Atomic(uint32_t) jl_gc_disable_counter;
+extern _Atomic(uint32_t) jl_gc_disable_counter;
 // All the functions are safe to be called from within a signal handler
 // provided that the thread will not be interrupted by another asynchronous
 // signal.
@@ -1070,9 +1094,7 @@ int jl_safepoint_consume_sigint(void);
 void jl_wake_libuv(void) JL_NOTSAFEPOINT;
 
 void jl_set_pgcstack(jl_gcframe_t **) JL_NOTSAFEPOINT;
-#if defined(_OS_DARWIN_)
-typedef pthread_key_t jl_pgcstack_key_t;
-#elif defined(_OS_WINDOWS_)
+#if defined(_OS_WINDOWS_)
 typedef DWORD jl_pgcstack_key_t;
 #else
 typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT;
@@ -1127,6 +1149,12 @@ JL_DLLEXPORT jl_code_instance_t *jl_cache_uninferred(jl_method_instance_t *mi, j
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_for_uninferred(jl_method_instance_t *mi, jl_code_info_t *src);
 JL_DLLEXPORT extern jl_value_t *(*const jl_rettype_inferred_addr)(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t min_world, size_t max_world) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT void jl_force_trace_compile_timing_enable(void);
+JL_DLLEXPORT void jl_force_trace_compile_timing_disable(void);
+
+JL_DLLEXPORT void jl_force_trace_dispatch_enable(void);
+JL_DLLEXPORT void jl_force_trace_dispatch_disable(void);
+
 uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
 jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs);
 
@@ -1360,28 +1388,47 @@ STATIC_INLINE size_t jl_excstack_next(jl_excstack_t *stack, size_t itr) JL_NOTSA
     return itr-2 - jl_excstack_bt_size(stack, itr);
 }
 // Exception stack manipulation
-void jl_push_excstack(jl_task_t* task, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
+void jl_push_excstack(jl_task_t *ct, jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_ARGUMENT,
                       jl_value_t *exception JL_ROOTED_ARGUMENT,
                       jl_bt_element_t *bt_data, size_t bt_size);
 
+// System util to get maximum RSS
+JL_DLLEXPORT size_t jl_maxrss(void);
+
 //--------------------------------------------------
 // congruential random number generator
 // for a small amount of thread-local randomness
 
-STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT
+//TODO: utilize https://github.com/openssl/openssl/blob/master/crypto/rand/rand_uniform.c#L13-L99
+// for better performance, it does however require making users expect a 32bit random number.
+
+STATIC_INLINE uint64_t cong(uint64_t max, uint64_t *seed) JL_NOTSAFEPOINT // Open interval [0, max)
 {
-    if (max == 0)
+    if (max < 2)
         return 0;
     uint64_t mask = ~(uint64_t)0;
-    --max;
-    mask >>= __builtin_clzll(max|1);
-    uint64_t x;
+    int zeros = __builtin_clzll(max);
+    int bits = CHAR_BIT * sizeof(uint64_t) - zeros;
+    mask = mask >> zeros;
     do {
-        *seed = 69069 * (*seed) + 362437;
-        x = *seed & mask;
-    } while (x > max);
-    return x;
+        uint64_t value = 69069 * (*seed) + 362437;
+        *seed = value;
+        uint64_t x = value & mask;
+        if (x < max) {
+            return x;
+        }
+        int bits_left = zeros;
+        while (bits_left >= bits) {
+            value >>= bits;
+            x = value & mask;
+            if (x < max) {
+                return x;
+            }
+            bits_left -= bits;
+        }
+    } while (1);
 }
+
 JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_init_rand(void);
@@ -1759,6 +1806,7 @@ void jl_write_malloc_log(void);
 #  define jl_unreachable() ((void)jl_assume(0))
 #endif
 
+extern uv_mutex_t symtab_lock;
 jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 
 // Tools for locally disabling spurious compiler warnings
@@ -1841,7 +1889,7 @@ JL_DLLIMPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncod
         int32_t *func_idx, int32_t *specfunc_idx);
 JL_DLLIMPORT void jl_register_fptrs(uint64_t image_base, const struct _jl_image_fptrs_t *fptrs,
                                     jl_method_instance_t **linfos, size_t n);
-
+JL_DLLIMPORT void jl_get_llvm_mis(void *native_code, arraylist_t* MIs);
 JL_DLLIMPORT void jl_init_codegen(void);
 JL_DLLIMPORT void jl_teardown_codegen(void) JL_NOTSAFEPOINT;
 JL_DLLIMPORT int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
diff --git a/src/julia_threads.h b/src/julia_threads.h
index acb47766a99a2..3b804823d796b 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -4,11 +4,12 @@
 #ifndef JL_THREADS_H
 #define JL_THREADS_H
 
-#ifdef MMTK_GC
-#include "mmtkMutator.h"
-#endif
-
+#ifndef MMTK_GC
 #include "gc-tls.h"
+#else
+#include "gc-tls-mmtk.h"
+#endif
+#include "gc-tls-common.h"
 #include "julia_atomics.h"
 #ifndef _OS_WINDOWS_
 #include "pthread.h"
@@ -22,6 +23,8 @@ extern "C" {
 
 JL_DLLEXPORT int16_t jl_threadid(void);
 JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT;
+JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT;
 
 // JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user
 
@@ -90,9 +93,13 @@ typedef ucontext_t _jl_ucontext_t;
 
 typedef struct {
     union {
-        _jl_ucontext_t ctx;
-        jl_stack_context_t copy_ctx;
+        _jl_ucontext_t *ctx;
+        jl_stack_context_t *copy_ctx;
     };
+    void *stkbuf; // malloc'd memory (either copybuf or stack)
+    size_t bufsz; // actual sizeof stkbuf
+    unsigned int copy_stack:31; // sizeof stack for copybuf
+    unsigned int started:1;
 #if defined(_COMPILER_TSAN_ENABLED_)
     void *tsan_state;
 #endif
@@ -153,19 +160,16 @@ typedef struct _jl_tls_states_t {
     // Counter to disable finalizer **on the current thread**
     int finalizers_inhibited;
     jl_gc_tls_states_t gc_tls; // this is very large, and the offset of the first member is baked into codegen
+    jl_gc_tls_states_common_t gc_tls_common; // common tls for both GCs
     volatile sig_atomic_t defer_signal;
     _Atomic(struct _jl_task_t*) current_task;
     struct _jl_task_t *next_task;
     struct _jl_task_t *previous_task;
     struct _jl_task_t *root_task;
     struct _jl_timing_block_t *timing_stack;
+    // This is the location of our copy_stack
     void *stackbase;
     size_t stacksize;
-    union {
-        _jl_ucontext_t base_ctx; // base context of stack
-        // This hack is needed to support always_copy_stacks:
-        jl_stack_context_t copy_stack_ctx;
-    };
     // Temp storage for exception thrown in signal handler. Not rooted.
     struct _jl_value_t *sig_exception;
     // Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
@@ -191,6 +195,9 @@ typedef struct _jl_tls_states_t {
     // Saved exception for previous *external* API call or NULL if cleared.
     // Access via jl_exception_occurred().
     struct _jl_value_t *previous_exception;
+#ifdef _OS_DARWIN_
+    jl_jmp_buf *volatile safe_restore;
+#endif
 
     // currently-held locks, to be released when an exception is thrown
     small_arraylist_t locks;
@@ -203,11 +210,6 @@ typedef struct _jl_tls_states_t {
         uint64_t sleep_leave;
     )
 
-#ifdef MMTK_GC
-    MMTkMutatorContext mmtk_mutator;
-    size_t malloc_sz_since_last_poll;
-#endif
-
     // some hidden state (usually just because we don't have the type's size declaration)
 #ifdef JL_LIBRARY_EXPORTS
     uv_mutex_t sleep_lock;
@@ -215,10 +217,7 @@ typedef struct _jl_tls_states_t {
 #endif
 } jl_tls_states_t;
 
-#ifndef JL_LIBRARY_EXPORTS
-// deprecated (only for external consumers)
 JL_DLLEXPORT void *jl_get_ptls_states(void);
-#endif
 
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
index 953ecc1830142..9d2fba832839c 100644
--- a/src/llvm-alloc-helpers.cpp
+++ b/src/llvm-alloc-helpers.cpp
@@ -88,6 +88,8 @@ bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
     memop.isaggr = isa<StructType>(elty) || isa<ArrayType>(elty) || isa<VectorType>(elty);
     memop.isobjref = hasObjref(elty);
     auto &field = getField(offset, size, elty);
+    field.second.hasunboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
+
     if (field.second.hasobjref != memop.isobjref)
         field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits
     if (!isstore)
@@ -198,6 +200,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
                 auto elty = inst->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             } else if (!required.use_info.addMemOp(inst, 0, cur.offset,
                                                                inst->getType(),
@@ -289,6 +292,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
                 auto elty = storev->getType();
                 required.use_info.has_unknown_objref |= hasObjref(elty);
                 required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             } else if (!required.use_info.addMemOp(inst, use->getOperandNo(),
                                                                cur.offset, storev->getType(),
@@ -310,10 +314,14 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
             }
             required.use_info.hasload = true;
             auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
+            Type *elty = storev->getType();
             if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
-                                                               cur.offset, storev->getType(),
+                                                               cur.offset, elty,
                                                                true, required.DL)) {
                 LLVM_DEBUG(dbgs() << "Atomic inst has unknown offset\n");
+                required.use_info.has_unknown_objref |= hasObjref(elty);
+                required.use_info.has_unknown_objrefaggr |= hasObjref(elty) && !isa<PointerType>(elty);
+                required.use_info.has_unknown_unboxed |= !hasObjref(elty) || (hasObjref(elty) && !isa<PointerType>(elty));
                 required.use_info.hasunknownmem = true;
             }
             required.use_info.refload = true;
diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h
index 49c3b15332a56..20e9132d10b4c 100644
--- a/src/llvm-alloc-helpers.h
+++ b/src/llvm-alloc-helpers.h
@@ -46,6 +46,8 @@ namespace jl_alloc {
         bool hasaggr:1;
         bool multiloc:1;
         bool hasload:1;
+        // The alloc has a unboxed object at this offset.
+        bool hasunboxed:1;
         llvm::Type *elty;
         llvm::SmallVector<MemOp,4> accesses;
         Field(uint32_t size, llvm::Type *elty)
@@ -54,6 +56,7 @@ namespace jl_alloc {
               hasaggr(false),
               multiloc(false),
               hasload(false),
+              hasunboxed(false),
               elty(elty)
         {
         }
@@ -95,6 +98,9 @@ namespace jl_alloc {
         // The alloc has an aggregate Julia object reference not in an explicit field.
         bool has_unknown_objrefaggr:1;
 
+        // The alloc has an unboxed object at an unknown offset.
+        bool has_unknown_unboxed:1;
+
         void reset()
         {
             escaped = false;
@@ -110,6 +116,7 @@ namespace jl_alloc {
             allockind = llvm::AllocFnKind::Unknown;
             has_unknown_objref = false;
             has_unknown_objrefaggr = false;
+            has_unknown_unboxed = false;
             uses.clear();
             preserves.clear();
             memops.clear();
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index e0cde7206b6b9..188955fd50972 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -252,10 +252,12 @@ void Optimizer::optimizeAll()
             removeAlloc(orig);
             continue;
         }
+        bool has_unboxed = use_info.has_unknown_unboxed;
         bool has_ref = use_info.has_unknown_objref;
         bool has_refaggr = use_info.has_unknown_objrefaggr;
         for (auto memop: use_info.memops) {
             auto &field = memop.second;
+            has_unboxed |= field.hasunboxed;
             if (field.hasobjref) {
                 has_ref = true;
                 // This can be relaxed a little based on hasload
@@ -284,6 +286,19 @@ void Optimizer::optimizeAll()
             splitOnStack(orig);
             continue;
         }
+        // The move to stack code below, if has_ref is set, changes the allocation to an array of jlvalue_t's. This is fine
+        // if all objects are jlvalue_t's. However, if part of the allocation is an unboxed value (e.g. it is a { float, jlvaluet }),
+        // then moveToStack will create a [2 x jlvaluet] bitcast to { float, jlvaluet }.
+        // This later causes the GC rooting pass, to miss-characterize the float as a pointer to a GC value
+        if (has_unboxed && has_ref) {
+            REMARK([&]() {
+                return OptimizationRemarkMissed(DEBUG_TYPE, "Escaped", orig)
+                    << "GC allocation could not be split since it contains both boxed and unboxed values, unable to move to stack " << ore::NV("GC Allocation", orig);
+            });
+            if (use_info.hastypeof)
+                optimizeTag(orig);
+            continue;
+        }
         REMARK([&](){
             return OptimizationRemark(DEBUG_TYPE, "Stack Move Allocation", orig)
                 << "GC allocation moved to stack " << ore::NV("GC Allocation", orig);
@@ -755,26 +770,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
             user->replaceUsesOfWith(orig_i, replace);
         }
         else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
-            #if JL_LLVM_VERSION >= 170000
-            #ifndef JL_NDEBUG
-            auto cast_t = PointerType::get(user->getType(), new_i->getType()->getPointerAddressSpace());
-            Type *new_t = new_i->getType();
-            assert(cast_t == new_t);
-            #endif
-            auto replace_i = new_i;
-            #else
-            auto cast_t = PointerType::getWithSamePointeeType(cast<PointerType>(user->getType()), new_i->getType()->getPointerAddressSpace());
-            auto replace_i = new_i;
-            Type *new_t = new_i->getType();
-            if (cast_t != new_t) {
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(cast_t->getContext().supportsTypedPointers());
-                replace_i = new BitCastInst(replace_i, cast_t, "", user);
-                replace_i->setDebugLoc(user->getDebugLoc());
-                replace_i->takeName(user);
-            }
-            #endif
-            push_frame(user, replace_i);
+            push_frame(user, new_i);
         }
         else if (auto gep = dyn_cast<GetElementPtrInst>(user)) {
             SmallVector<Value *, 4> IdxOperands(gep->idx_begin(), gep->idx_end());
diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h
index 242dab021f101..a99e18f3e3762 100644
--- a/src/llvm-codegen-shared.h
+++ b/src/llvm-codegen-shared.h
@@ -125,7 +125,6 @@ struct CountTrackedPointers {
     CountTrackedPointers(llvm::Type *T, bool ignore_loaded=false);
 };
 
-unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder);
 llvm::SmallVector<llvm::Value*, 0> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
 
 static inline void llvm_dump(llvm::Value *v)
@@ -187,45 +186,39 @@ static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instructi
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *pgcstack)
+static inline llvm::Value *get_current_task_from_pgcstack(llvm::IRBuilder<> &builder, llvm::Value *pgcstack)
 {
     using namespace llvm;
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto i8 = builder.getInt8Ty();
     const int pgcstack_offset = offsetof(jl_task_t, gcstack);
-    return builder.CreateInBoundsGEP(
-            T_pjlvalue, pgcstack,
-            ConstantInt::get(T_size, -(pgcstack_offset / sizeof(void *))),
-            "current_task");
+    return builder.CreateConstInBoundsGEP1_32(i8, pgcstack, -pgcstack_offset, "current_task");
 }
 
 // Get PTLS through current task.
-static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *current_task, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
 {
     using namespace llvm;
-    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto i8 = builder.getInt8Ty();
+    auto T_ptr = builder.getPtrTy();
     const int ptls_offset = offsetof(jl_task_t, ptls);
-    llvm::Value *pptls = builder.CreateInBoundsGEP(
-            T_pjlvalue, current_task,
-            ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
-            "ptls_field");
-    LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
-            pptls, Align(sizeof(void *)), "ptls_load");
+    llvm::Value *pptls = builder.CreateConstInBoundsGEP1_32(i8, current_task, ptls_offset, "ptls_field");
+    LoadInst *ptls_load = builder.CreateAlignedLoad(T_ptr, pptls, Align(sizeof(void *)), "ptls_load");
     // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
     tbaa_decorate(tbaa, ptls_load);
     return ptls_load;
 }
 
 // Get signal page through current task.
-static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa)
+static inline llvm::Value *get_current_signal_page_from_ptls(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa)
 {
     using namespace llvm;
     // return builder.CreateCall(prepare_call(reuse_signal_page_func));
-    auto T_psize = T_size->getPointerTo();
-    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
-    llvm::Value *psafepoint = builder.CreateInBoundsGEP(
-            T_psize, ptls, ConstantInt::get(T_size, nthfield));
+    auto T_ptr = builder.getPtrTy();
+    auto i8 = builder.getInt8Ty();
+    int nthfield = offsetof(jl_tls_states_t, safepoint);
+    llvm::Value *psafepoint = builder.CreateConstInBoundsGEP1_32(i8, ptls, nthfield);
     LoadInst *ptls_load = builder.CreateAlignedLoad(
-            T_psize, psafepoint, Align(sizeof(void *)), "safepoint");
+            T_ptr, psafepoint, Align(sizeof(void *)), "safepoint");
     tbaa_decorate(tbaa, ptls_load);
     return ptls_load;
 }
@@ -239,7 +232,7 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder)
 static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false)
 {
     using namespace llvm;
-    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, T_size, ptls, tbaa);
+    llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa);
     emit_signal_fence(builder);
     Module *M = builder.GetInsertBlock()->getModule();
     LLVMContext &C = builder.getContext();
@@ -250,8 +243,7 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s
     else {
         Function *F = M->getFunction("julia.safepoint");
         if (!F) {
-            auto T_psize = T_size->getPointerTo();
-            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false);
+            FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_size->getPointerTo()}, false);
             F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M);
 #if JL_LLVM_VERSION >= 160000
             F->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly());
@@ -268,8 +260,8 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
 {
     using namespace llvm;
     Type *T_int8 = state->getType();
-    Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state));
-    Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef<Value*>(offset), "gc_state");
+    unsigned offset = offsetof(jl_tls_states_t, gc_state);
+    Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state");
     if (old_state == nullptr) {
         old_state = builder.CreateLoad(T_int8, gc_state);
         cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
index 2539c5cd2e37c..05d62adc57926 100644
--- a/src/llvm-cpufeatures.cpp
+++ b/src/llvm-cpufeatures.cpp
@@ -94,7 +94,7 @@ bool lowerCPUFeatures(Module &M) JL_NOTSAFEPOINT
     for (auto &F: M.functions()) {
         auto FN = F.getName();
 
-        if (FN.startswith("julia.cpu.have_fma.")) {
+        if (FN.starts_with("julia.cpu.have_fma.")) {
             for (Use &U: F.uses()) {
                 User *RU = U.getUser();
                 CallInst *I = cast<CallInst>(RU);
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
index 5d0d9f5d37c40..7f1b076897fc8 100644
--- a/src/llvm-demote-float16.cpp
+++ b/src/llvm-demote-float16.cpp
@@ -49,37 +49,28 @@ extern JuliaOJIT *jl_ExecutionEngine;
 
 namespace {
 
-static bool have_fp16(Function &caller, const Triple &TT) {
-    Attribute FSAttr = caller.getFnAttribute("target-features");
-    StringRef FS = "";
-    if (FSAttr.isValid())
-        FS = FSAttr.getValueAsString();
-    else if (jl_ExecutionEngine)
-        FS = jl_ExecutionEngine->getTargetFeatureString();
-    // else probably called from opt, just do nothing
-    if (TT.isAArch64()) {
-        if (FS.find("+fp16fml") != llvm::StringRef::npos || FS.find("+fullfp16") != llvm::StringRef::npos){
-            return true;
-        }
-    } else if (TT.getArch() == Triple::x86_64) {
-        if (FS.find("+avx512fp16") != llvm::StringRef::npos){
-            return true;
-        }
-    }
-    if (caller.hasFnAttribute("julia.hasfp16")) {
-        return true;
-    }
-    return false;
+static bool have_fp16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasfp16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
+
+    // llvm/llvm-project#97975: on some platforms, `half` uses excessive precision
+    if (TT.isPPC())
+        return false;
+
+    return true;
 }
 
-static bool have_bf16(Function &caller, const Triple &TT) {
-    if (caller.hasFnAttribute("julia.hasbf16")) {
-        return true;
-    }
+static bool have_bf16(Function &F, const Triple &TT) {
+    // for testing purposes
+    Attribute Attr = F.getFnAttribute("julia.hasbf16");
+    if (Attr.isValid())
+        return Attr.getValueAsBool();
 
-    // there's no targets that fully support bfloat yet;,
-    // AVX512BF16 only provides conversion and dot product instructions.
-    return false;
+    // https://github.com/llvm/llvm-project/issues/97975#issuecomment-2218770199:
+    // on current versions of LLVM, bf16 always uses TypeSoftPromoteHalf
+    return true;
 }
 
 static bool demoteFloat16(Function &F)
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index dd4dd05a89101..b06a084651231 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -1,22 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "llvm-version.h"
-#include "passes.h"
-
-#include <llvm/ADT/Statistic.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include "llvm-codegen-shared.h"
-#include "julia.h"
-#include "julia_internal.h"
-#include "llvm-pass-helpers.h"
+#include "llvm-gc-interface-passes.h"
 
 #define DEBUG_TYPE "final_gc_lowering"
 STATISTIC(NewGCFrameCount, "Number of lowered newGCFrameFunc intrinsics");
@@ -27,63 +11,6 @@ STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
 STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics");
 STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics");
 
-using namespace llvm;
-
-// The final GC lowering pass. This pass lowers platform-agnostic GC
-// intrinsics to platform-dependent instruction sequences. The
-// intrinsics it targets are those produced by the late GC frame
-// lowering pass.
-//
-// This pass targets typical back-ends for which the standard Julia
-// runtime library is available. Atypical back-ends should supply
-// their own lowering pass.
-
-struct FinalLowerGC: private JuliaPassContext {
-    bool runOnFunction(Function &F);
-
-private:
-    Function *queueRootFunc;
-    Function *poolAllocFunc;
-    Function *bigAllocFunc;
-    Function *allocTypedFunc;
-#ifdef MMTK_GC
-    Function *writeBarrier1Func;
-    Function *writeBarrier2Func;
-    Function *writeBarrier1SlowFunc;
-    Function *writeBarrier2SlowFunc;
-#endif
-    Instruction *pgcstack;
-    Type *T_size;
-
-    // Lowers a `julia.new_gc_frame` intrinsic.
-    void lowerNewGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.push_gc_frame` intrinsic.
-    void lowerPushGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.pop_gc_frame` intrinsic.
-    void lowerPopGCFrame(CallInst *target, Function &F);
-
-    // Lowers a `julia.get_gc_frame_slot` intrinsic.
-    void lowerGetGCFrameSlot(CallInst *target, Function &F);
-
-    // Lowers a `julia.gc_alloc_bytes` intrinsic.
-    void lowerGCAllocBytes(CallInst *target, Function &F);
-
-    // Lowers a `julia.queue_gc_root` intrinsic.
-    void lowerQueueGCRoot(CallInst *target, Function &F);
-
-    // Lowers a `julia.safepoint` intrinsic.
-    void lowerSafepoint(CallInst *target, Function &F);
-
-#ifdef MMTK_GC
-    void lowerWriteBarrier1(CallInst *target, Function &F);
-    void lowerWriteBarrier2(CallInst *target, Function &F);
-    void lowerWriteBarrier1Slow(CallInst *target, Function &F);
-    void lowerWriteBarrier2Slow(CallInst *target, Function &F);
-#endif
-};
-
 void FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
 {
     ++NewGCFrameCount;
@@ -241,7 +168,7 @@ void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
         else {
             auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
             auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
-            newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize, type });
+            newI = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize, type });
             if (sz > 0)
                 derefBytes = sz;
         }
@@ -277,7 +204,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
     }
     LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
     queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
-    poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
+    smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
     bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
     allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped);
 #ifdef MMTK_GC
@@ -307,9 +234,9 @@ bool FinalLowerGC::runOnFunction(Function &F)
             } while (0)
 
             LOWER_INTRINSIC(newGCFrame, lowerNewGCFrame);
+            LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot);
             LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame);
             LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame);
-            LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot);
             LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes);
             LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot);
             LOWER_INTRINSIC(safepoint, lowerSafepoint);
diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h
new file mode 100644
index 0000000000000..7ddfc1f1c10ef
--- /dev/null
+++ b/src/llvm-gc-interface-passes.h
@@ -0,0 +1,430 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/*
+  LLVM passes that may be partially modified by a third-party GC implementation.
+*/
+
+#include "llvm-version.h"
+#include "passes.h"
+
+#include "llvm/IR/DerivedTypes.h"
+#include <llvm-c/Core.h>
+#include <llvm-c/Types.h>
+
+#include <llvm/ADT/Statistic.h>
+#include <llvm/ADT/BitVector.h>
+#include <llvm/ADT/SparseBitVector.h>
+#include <llvm/ADT/PostOrderIterator.h>
+#include <llvm/ADT/SetVector.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/ADT/SmallSet.h>
+#include <llvm/Analysis/CFG.h>
+#include <llvm/Analysis/DomTreeUpdater.h>
+#include <llvm/Analysis/InstSimplifyFolder.h>
+#include <llvm/IR/Value.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Dominators.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/MDBuilder.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/ModuleSlotTracker.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Pass.h>
+#include <llvm/Support/Debug.h>
+#include <llvm/Transforms/Utils/BasicBlockUtils.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+
+#include <llvm/InitializePasses.h>
+
+#include "llvm-codegen-shared.h"
+#include "julia.h"
+#include "julia_internal.h"
+#include "julia_assert.h"
+#include "llvm-pass-helpers.h"
+#include <map>
+#include <string>
+
+#ifndef LLVM_GC_PASSES_H
+#define LLVM_GC_PASSES_H
+
+using namespace llvm;
+
+/* Julia GC Root Placement pass. For a general overview of the design of GC
+   root lowering, see the devdocs. This file is the actual implementation.
+
+   The actual algorithm is fairly straightforward. First recall the goal of this
+   pass:
+
+   Minimize the number of needed gc roots/stores to them subject to the constraint
+   that at every safepoint, any live gc-tracked pointer (i.e. for which there is
+   a path after this point that contains a use of this pointer) is in some gc slot.
+
+   In particular, in order to understand this algorithm, it is important to
+   realize that the only places where rootedness matters is at safepoints.
+
+   Now, the primary phases of the algorithm are:
+
+   1. Local Scan
+
+      During this step, each Basic Block is inspected and analyzed for local
+      properties. In particular, we want to determine the ordering of any of
+      the following activities:
+
+        - Any Def of a gc-tracked pointer. In general Defs are the results of
+          calls or loads from appropriate memory locations. Phi nodes and
+          selects do complicate this story slightly as described below.
+        - Any use of a gc-tracked or derived pointer. As described in the
+          devdocs, a use is in general one of
+              a) a load from a tracked/derived value
+              b) a store to a tracked/derived value
+              c) a store OF a tracked/derived value
+              d) a use of a value as a call operand (including operand bundles)
+        - Any safepoint
+
+      Crucially, we also perform pointer numbering during the local scan,
+      assigning every Def a unique integer and caching the integer for each
+      derived pointer. This allows us to operate only on the set of Defs (
+      represented by these integers) for the rest of the algorithm. We also
+      maintain some local utility information that is needed by later passes
+      (see the BBState struct for details).
+
+    2. Dataflow Computation
+
+      This computation operates entirely over the function's control flow graph
+      and does not look into a basic block. The algorithm is essentially
+      textbook iterative data flow for liveness computation. However, the
+      data flow equations are slightly more complicated because we also
+      forward propagate rootedness information in addition to backpropagating
+      liveness.
+
+    3. Live Set Computation
+
+      With the liveness information from the previous step, we can now compute,
+      for every safepoint, the set of values live at that particular safepoint.
+      There are three pieces of information being combined here:
+           i. Values that needed to be live due to local analysis (e.g. there
+              was a def, then a safepoint, then a use). This was computed during
+              local analysis.
+          ii. Values that are live across the basic block (i.e. they are live
+              at every safepoint within the basic block). This relies entirely
+              on the liveness information.
+         iii. Values that are now live-out from the basic block (i.e. they are
+              live at every safepoint following their def). During local
+              analysis, we keep, for every safepoint, those values that would
+              be live if they were live out. Here we can check if they are
+              actually live-out and make the appropriate additions to the live
+              set.
+
+       Lastly, we also explicitly compute, for each value, the list of values
+       that are simultaneously live at some safepoint. This is known as an
+       "interference graph" and is the input to the next step.
+
+    4. GC Root coloring
+
+      Two values which are not simultaneously live at a safepoint can share the
+      same slot. This is an important optimization, because otherwise long
+      functions would have exceptionally large GC slots, reducing performance
+      and bloating the size of the stack. Assigning values to these slots is
+      equivalent to doing graph coloring on the interference graph - the graph
+      where nodes are values and two values have an edge if they are
+      simultaneously live at a safepoint - which we computed in the previous
+      step. Now graph coloring in general is a hard problem. However, for SSA
+      form programs, (and most programs in general, by virtue of their
+      structure), the resulting interference graphs are chordal and can be
+      colored optimally in linear time by performing greedy coloring in a
+      perfect elimination order. Now, our interference graphs are likely not
+      entirely chordal due to some non-SSA corner cases. However, using the same
+      algorithm should still give a very good coloring while having sufficiently
+      low runtime.
+
+    5. JLCall frame optimizations
+
+      Unlike earlier iterations of the gc root placement logic, jlcall frames
+      are no longer treated as a special case and need not necessarily be sunk
+      into the gc frame. Additionally, we now emit lifetime
+      intrinsics, so regular stack slot coloring will merge any jlcall frames
+      not sunk into the gc frame. Nevertheless performing such sinking can still
+      be profitable. Since all arguments to a jlcall are guaranteed to be live
+      at that call in some gc slot, we can attempt to rearrange the slots within
+      the gc-frame, or reuse slots not assigned at that particular location
+      for the gcframe. However, even without this optimization, stack frames
+      are at most two times larger than optimal (because regular stack coloring
+      can merge the jlcall allocas).
+
+      N.B.: This step is not yet implemented.
+
+    6. Root placement
+
+      This performs the actual insertion of the GCFrame pushes/pops, zeros out
+      the gc frame and creates the stores to the gc frame according to the
+      stack slot assignment computed in the previous step. GC frames stores
+      are generally sunk right before the first safe point that use them
+      (this is beneficial for code where the primary path does not have
+      safepoints, but some other path - e.g. the error path does). However,
+      if the first safepoint is not dominated by the definition (this can
+      happen due to the non-ssa corner cases), the store is inserted right after
+      the definition.
+
+    7. Cleanup
+
+      This step performs necessary cleanup before passing the IR to codegen. In
+      particular, it removes any calls to julia_from_objref intrinsics and
+      removes the extra operand bundles from ccalls. In the future it could
+      also strip the addrspace information from all values as this
+      information is no longer needed.
+
+
+  There are a couple important special cases that deserve special attention:
+
+    A. PHIs and Selects
+
+      In general PHIs and selects are treated as separate defs for the purposes
+      of the algorithm and their operands as uses of those values. It is
+      important to consider however WHERE the uses of PHI's operands are
+      located. It is neither at the start of the basic block, because the values
+      do not dominate the block (so can't really consider them live-in), nor
+      at the end of the predecessor (because they are actually live out).
+      Instead it is best to think of those uses as living on the edge between
+      the appropriate predecessor and the block containing the PHI.
+
+      Another concern is PHIs of derived values. Since we cannot simply root
+      these values by storing them to a GC slot, we need to insert a new,
+      artificial PHI that tracks the base pointers for the derived values. E.g.
+      in:
+
+      A:
+        %Abase = load addrspace(10) *...
+        %Aderived = addrspacecast %Abase to addrspace(11)
+      B:
+        %Bbase = load addrspace(10) *...
+        %Bderived = addrspacecast %Bbase to addrspace(11)
+      C:
+        %phi = phi [%Aderived, %A
+                    %Bderived, %B]
+
+      we will insert another phi in C to track the relevant base pointers:
+
+        %philift = phi [%Abase, %A
+                        %Bbase, %B]
+
+      We then pretend, for the purposes of numbering that %phi was derived from
+      %philift. Note that in order to be able to do this, we need to be able to
+      perform this lifting either during numbering or instruction scanning.
+
+    B. Vectors of pointers/Union representations
+
+      Since this pass runs very late in the pass pipeline, it runs after the
+      various vectorization passes. As a result, we have to potentially deal
+      with vectors of gc-tracked pointers. For the purposes of most of the
+      algorithm, we simply assign every element of the vector a separate number
+      and no changes are needed. However, those parts of the algorithm that
+      look at IR need to be aware of the possibility of encountering vectors of
+      pointers.
+
+      Similarly, unions (e.g. in call returns) are represented as a struct of
+      a gc-tracked value and an argument selector. We simply assign a single
+      number to this struct and proceed as if it was a single pointer. However,
+      this again requires care at the IR level.
+
+    C. Non mem2reg'd allocas
+
+      Under some circumstances, allocas will still be present in the IR when
+      we get to this pass. We don't try very hard to handle this case, and
+      simply sink the alloca into the GCFrame.
+*/
+
+// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
+// substantially smaller here doesn't actually save much memory because of malloc overhead.
+// Too large is bad also though - 4096 was found to be a reasonable middle ground.
+using LargeSparseBitVector = SparseBitVector<4096>;
+
+struct BBState {
+    // Uses in this BB
+    // These do not get updated after local analysis
+    LargeSparseBitVector Defs;
+    LargeSparseBitVector PhiOuts;
+    LargeSparseBitVector UpExposedUses;
+    // These get updated during dataflow
+    LargeSparseBitVector LiveIn;
+    LargeSparseBitVector LiveOut;
+    SmallVector<int, 0> Safepoints;
+    int TopmostSafepoint = -1;
+    bool HasSafepoint = false;
+    // Have we gone through this basic block in our local scan yet?
+    bool Done = false;
+};
+
+struct State {
+    Function *const F;
+    DominatorTree *DT;
+
+    // The maximum assigned value number
+    int MaxPtrNumber;
+    // The maximum assigned safepoint number
+    int MaxSafepointNumber;
+    // Cache of numbers assigned to IR values. This includes caching of numbers
+    // for derived values
+    std::map<Value *, int> AllPtrNumbering;
+    std::map<Value *, SmallVector<int, 0>> AllCompositeNumbering;
+    // The reverse of the previous maps
+    std::map<int, Value *> ReversePtrNumbering;
+    // Neighbors in the coloring interference graph. I.e. for each value, the
+    // indices of other values that are used simultaneously at some safe point.
+    SmallVector<LargeSparseBitVector, 0> Neighbors;
+    // The result of the local analysis
+    std::map<const BasicBlock *, BBState> BBStates;
+
+    // Refinement map. If all of the values are rooted
+    // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
+    // the key is already rooted (but not the other way around).
+    // A value that can be refined to -2 never need any rooting or write barrier.
+    // A value that can be refined to -1 don't need local root but still need write barrier.
+    // At the end of `LocalScan` this map has a few properties
+    // 1. Values are either < 0 or dominates the key
+    // 2. Therefore this is a DAG
+    std::map<int, SmallVector<int, 1>> Refinements;
+
+    // GC preserves map. All safepoints dominated by the map key, but not any
+    // of its uses need to preserve the values listed in the map value.
+    std::map<Instruction *, SmallVector<int, 0>> GCPreserves;
+
+    // The assignment of numbers to safepoints. The indices in the map
+    // are indices into the next three maps which store safepoint properties
+    std::map<Instruction *, int> SafepointNumbering;
+
+    // Reverse mapping index -> safepoint
+    SmallVector<Instruction *, 0> ReverseSafepointNumbering;
+
+    // Instructions that can return twice. For now, all values live at these
+    // instructions will get their own, dedicated GC frame slots, because they
+    // have unobservable control flow, so we can't be sure where they're
+    // actually live. All of these are also considered safepoints.
+    SmallVector<Instruction *, 0> ReturnsTwice;
+
+    // The set of values live at a particular safepoint
+    SmallVector< LargeSparseBitVector , 0> LiveSets;
+    // Those values that - if live out from our parent basic block - are live
+    // at this safepoint.
+    SmallVector<SmallVector<int, 0>> LiveIfLiveOut;
+    // The set of values that are kept alive by the callee.
+    SmallVector<SmallVector<int, 0>> CalleeRoots;
+    // We don't bother doing liveness on Allocas that were not mem2reg'ed.
+    // they just get directly sunk into the root array.
+    DenseMap<AllocaInst *, unsigned> ArrayAllocas;
+    DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
+    SmallVector<std::pair<StoreInst *, unsigned>, 0> TrackedStores;
+    State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
+};
+
+
+struct LateLowerGCFrame:  private JuliaPassContext {
+    function_ref<DominatorTree &()> GetDT;
+    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
+
+public:
+    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
+
+private:
+    CallInst *pgcstack;
+    Function *smallAllocFunc;
+
+    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef<int> &SafepointsSoFar,
+                      SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
+    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses, Function &F);
+    void NoteUse(State &S, BBState &BBS, Value *V, Function &F) {
+        NoteUse(S, BBS, V, BBS.UpExposedUses, F);
+    }
+
+    void LiftPhi(State &S, PHINode *Phi);
+    void LiftSelect(State &S, SelectInst *SI);
+    Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
+    SmallVector<Value*, 0> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
+    Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
+
+    int Number(State &S, Value *V);
+    int NumberBase(State &S, Value *Base);
+    SmallVector<int, 0> NumberAll(State &S, Value *V);
+    SmallVector<int, 0> NumberAllBase(State &S, Value *Base);
+
+    void NoteOperandUses(State &S, BBState &BBS, Instruction &UI);
+    void MaybeTrackDst(State &S, MemTransferInst *MI);
+    void MaybeTrackStore(State &S, StoreInst *I);
+    State LocalScan(Function &F);
+    void ComputeLiveness(State &S);
+    void ComputeLiveSets(State &S);
+    SmallVector<int, 0> ColorRoots(const State &S);
+    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
+    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame);
+    void PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
+    void CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified);
+    bool CleanupIR(Function &F, State *S, bool *CFGModified);
+    void NoteUseChain(State &S, BBState &BBS, User *TheUser);
+    SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
+    void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
+    void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots);
+    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
+    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
+#ifdef MMTK_GC
+    Value* lowerGCAllocBytesLate(CallInst *target, Function &F);
+#endif
+};
+
+// The final GC lowering pass. This pass lowers platform-agnostic GC
+// intrinsics to platform-dependent instruction sequences. The
+// intrinsics it targets are those produced by the late GC frame
+// lowering pass.
+//
+// This pass targets typical back-ends for which the standard Julia
+// runtime library is available. Atypical back-ends should supply
+// their own lowering pass.
+
+struct FinalLowerGC: private JuliaPassContext {
+    bool runOnFunction(Function &F);
+
+private:
+    Function *queueRootFunc;
+    Function *smallAllocFunc;
+    Function *bigAllocFunc;
+    Function *allocTypedFunc;
+#ifdef MMTK_GC
+    Function *writeBarrier1Func;
+    Function *writeBarrier2Func;
+    Function *writeBarrier1SlowFunc;
+    Function *writeBarrier2SlowFunc;
+#endif
+    Instruction *pgcstack;
+    Type *T_size;
+
+    // Lowers a `julia.new_gc_frame` intrinsic.
+    void lowerNewGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.push_gc_frame` intrinsic.
+    void lowerPushGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.pop_gc_frame` intrinsic.
+    void lowerPopGCFrame(CallInst *target, Function &F);
+
+    // Lowers a `julia.get_gc_frame_slot` intrinsic.
+    void lowerGetGCFrameSlot(CallInst *target, Function &F);
+
+    // Lowers a `julia.gc_alloc_bytes` intrinsic.
+    void lowerGCAllocBytes(CallInst *target, Function &F);
+
+    // Lowers a `julia.queue_gc_root` intrinsic.
+    void lowerQueueGCRoot(CallInst *target, Function &F);
+
+    // Lowers a `julia.safepoint` intrinsic.
+    void lowerSafepoint(CallInst *target, Function &F);
+
+#ifdef MMTK_GC
+    void lowerWriteBarrier1(CallInst *target, Function &F);
+    void lowerWriteBarrier2(CallInst *target, Function &F);
+    void lowerWriteBarrier1Slow(CallInst *target, Function &F);
+    void lowerWriteBarrier2Slow(CallInst *target, Function &F);
+#endif
+};
+
+#endif // LLVM_GC_PASSES_H
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index 5923214a47652..3201ae64cf984 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -1,372 +1,9 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include "llvm-version.h"
-#include "passes.h"
-
-#include "llvm/IR/DerivedTypes.h"
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/BitVector.h>
-#include <llvm/ADT/SparseBitVector.h>
-#include <llvm/ADT/PostOrderIterator.h>
-#include <llvm/ADT/SetVector.h>
-#include <llvm/ADT/SmallVector.h>
-#include <llvm/ADT/SmallSet.h>
-#include <llvm/Analysis/CFG.h>
-#include <llvm/Analysis/InstSimplifyFolder.h>
-#include <llvm/IR/Value.h>
-#include <llvm/IR/Constants.h>
-#include <llvm/IR/Dominators.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Instructions.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/MDBuilder.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/ModuleSlotTracker.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/Verifier.h>
-#include <llvm/Pass.h>
-#include <llvm/Support/Debug.h>
-#include <llvm/Transforms/Utils/BasicBlockUtils.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-#include <llvm/Analysis/DomTreeUpdater.h>
-
-#include <llvm/InitializePasses.h>
-
-#include "llvm-codegen-shared.h"
-#include "julia.h"
-#include "julia_internal.h"
-#include "julia_assert.h"
-#include "llvm-pass-helpers.h"
-#include <map>
-#include <string>
+#include "llvm-gc-interface-passes.h"
 
 #define DEBUG_TYPE "late_lower_gcroot"
 
-using namespace llvm;
-
-/* Julia GC Root Placement pass. For a general overview of the design of GC
-   root lowering, see the devdocs. This file is the actual implementation.
-
-   The actual algorithm is fairly straightforward. First recall the goal of this
-   pass:
-
-   Minimize the number of needed gc roots/stores to them subject to the constraint
-   that at every safepoint, any live gc-tracked pointer (i.e. for which there is
-   a path after this point that contains a use of this pointer) is in some gc slot.
-
-   In particular, in order to understand this algorithm, it is important to
-   realize that the only places where rootedness matters is at safepoints.
-
-   Now, the primary phases of the algorithm are:
-
-   1. Local Scan
-
-      During this step, each Basic Block is inspected and analyzed for local
-      properties. In particular, we want to determine the ordering of any of
-      the following activities:
-
-        - Any Def of a gc-tracked pointer. In general Defs are the results of
-          calls or loads from appropriate memory locations. Phi nodes and
-          selects do complicate this story slightly as described below.
-        - Any use of a gc-tracked or derived pointer. As described in the
-          devdocs, a use is in general one of
-              a) a load from a tracked/derived value
-              b) a store to a tracked/derived value
-              c) a store OF a tracked/derived value
-              d) a use of a value as a call operand (including operand bundles)
-        - Any safepoint
-
-      Crucially, we also perform pointer numbering during the local scan,
-      assigning every Def a unique integer and caching the integer for each
-      derived pointer. This allows us to operate only on the set of Defs (
-      represented by these integers) for the rest of the algorithm. We also
-      maintain some local utility information that is needed by later passes
-      (see the BBState struct for details).
-
-    2. Dataflow Computation
-
-      This computation operates entirely over the function's control flow graph
-      and does not look into a basic block. The algorithm is essentially
-      textbook iterative data flow for liveness computation. However, the
-      data flow equations are slightly more complicated because we also
-      forward propagate rootedness information in addition to backpropagating
-      liveness.
-
-    3. Live Set Computation
-
-      With the liveness information from the previous step, we can now compute,
-      for every safepoint, the set of values live at that particular safepoint.
-      There are three pieces of information being combined here:
-           i. Values that needed to be live due to local analysis (e.g. there
-              was a def, then a safepoint, then a use). This was computed during
-              local analysis.
-          ii. Values that are live across the basic block (i.e. they are live
-              at every safepoint within the basic block). This relies entirely
-              on the liveness information.
-         iii. Values that are now live-out from the basic block (i.e. they are
-              live at every safepoint following their def). During local
-              analysis, we keep, for every safepoint, those values that would
-              be live if they were live out. Here we can check if they are
-              actually live-out and make the appropriate additions to the live
-              set.
-
-       Lastly, we also explicitly compute, for each value, the list of values
-       that are simultaneously live at some safepoint. This is known as an
-       "interference graph" and is the input to the next step.
-
-    4. GC Root coloring
-
-      Two values which are not simultaneously live at a safepoint can share the
-      same slot. This is an important optimization, because otherwise long
-      functions would have exceptionally large GC slots, reducing performance
-      and bloating the size of the stack. Assigning values to these slots is
-      equivalent to doing graph coloring on the interference graph - the graph
-      where nodes are values and two values have an edge if they are
-      simultaneously live at a safepoint - which we computed in the previous
-      step. Now graph coloring in general is a hard problem. However, for SSA
-      form programs, (and most programs in general, by virtue of their
-      structure), the resulting interference graphs are chordal and can be
-      colored optimally in linear time by performing greedy coloring in a
-      perfect elimination order. Now, our interference graphs are likely not
-      entirely chordal due to some non-SSA corner cases. However, using the same
-      algorithm should still give a very good coloring while having sufficiently
-      low runtime.
-
-    5. JLCall frame optimizations
-
-      Unlike earlier iterations of the gc root placement logic, jlcall frames
-      are no longer treated as a special case and need not necessarily be sunk
-      into the gc frame. Additionally, we now emit lifetime
-      intrinsics, so regular stack slot coloring will merge any jlcall frames
-      not sunk into the gc frame. Nevertheless performing such sinking can still
-      be profitable. Since all arguments to a jlcall are guaranteed to be live
-      at that call in some gc slot, we can attempt to rearrange the slots within
-      the gc-frame, or reuse slots not assigned at that particular location
-      for the gcframe. However, even without this optimization, stack frames
-      are at most two times larger than optimal (because regular stack coloring
-      can merge the jlcall allocas).
-
-      N.B.: This step is not yet implemented.
-
-    6. Root placement
-
-      This performs the actual insertion of the GCFrame pushes/pops, zeros out
-      the gc frame and creates the stores to the gc frame according to the
-      stack slot assignment computed in the previous step. GC frames stores
-      are generally sunk right before the first safe point that use them
-      (this is beneficial for code where the primary path does not have
-      safepoints, but some other path - e.g. the error path does). However,
-      if the first safepoint is not dominated by the definition (this can
-      happen due to the non-ssa corner cases), the store is inserted right after
-      the definition.
-
-    7. Cleanup
-
-      This step performs necessary cleanup before passing the IR to codegen. In
-      particular, it removes any calls to julia_from_objref intrinsics and
-      removes the extra operand bundles from ccalls. In the future it could
-      also strip the addrspace information from all values as this
-      information is no longer needed.
-
-
-  There are a couple important special cases that deserve special attention:
-
-    A. PHIs and Selects
-
-      In general PHIs and selects are treated as separate defs for the purposes
-      of the algorithm and their operands as uses of those values. It is
-      important to consider however WHERE the uses of PHI's operands are
-      located. It is neither at the start of the basic block, because the values
-      do not dominate the block (so can't really consider them live-in), nor
-      at the end of the predecessor (because they are actually live out).
-      Instead it is best to think of those uses as living on the edge between
-      the appropriate predecessor and the block containing the PHI.
-
-      Another concern is PHIs of derived values. Since we cannot simply root
-      these values by storing them to a GC slot, we need to insert a new,
-      artificial PHI that tracks the base pointers for the derived values. E.g.
-      in:
-
-      A:
-        %Abase = load addrspace(10) *...
-        %Aderived = addrspacecast %Abase to addrspace(11)
-      B:
-        %Bbase = load addrspace(10) *...
-        %Bderived = addrspacecast %Bbase to addrspace(11)
-      C:
-        %phi = phi [%Aderived, %A
-                    %Bderived, %B]
-
-      we will insert another phi in C to track the relevant base pointers:
-
-        %philift = phi [%Abase, %A
-                        %Bbase, %B]
-
-      We then pretend, for the purposes of numbering that %phi was derived from
-      %philift. Note that in order to be able to do this, we need to be able to
-      perform this lifting either during numbering or instruction scanning.
-
-    B. Vectors of pointers/Union representations
-
-      Since this pass runs very late in the pass pipeline, it runs after the
-      various vectorization passes. As a result, we have to potentially deal
-      with vectors of gc-tracked pointers. For the purposes of most of the
-      algorithm, we simply assign every element of the vector a separate number
-      and no changes are needed. However, those parts of the algorithm that
-      look at IR need to be aware of the possibility of encountering vectors of
-      pointers.
-
-      Similarly, unions (e.g. in call returns) are represented as a struct of
-      a gc-tracked value and an argument selector. We simply assign a single
-      number to this struct and proceed as if it was a single pointer. However,
-      this again requires care at the IR level.
-
-    C. Non mem2reg'd allocas
-
-      Under some circumstances, allocas will still be present in the IR when
-      we get to this pass. We don't try very hard to handle this case, and
-      simply sink the alloca into the GCFrame.
-*/
-
-// 4096 bits == 64 words (64 bit words). Larger bit numbers are faster and doing something
-// substantially smaller here doesn't actually save much memory because of malloc overhead.
-// Too large is bad also though - 4096 was found to be a reasonable middle ground.
-using LargeSparseBitVector = SparseBitVector<4096>;
-
-struct BBState {
-    // Uses in this BB
-    // These do not get updated after local analysis
-    LargeSparseBitVector Defs;
-    LargeSparseBitVector PhiOuts;
-    LargeSparseBitVector UpExposedUses;
-    // These get updated during dataflow
-    LargeSparseBitVector LiveIn;
-    LargeSparseBitVector LiveOut;
-    SmallVector<int, 0> Safepoints;
-    int TopmostSafepoint = -1;
-    bool HasSafepoint = false;
-    // Have we gone through this basic block in our local scan yet?
-    bool Done = false;
-};
-
-struct State {
-    Function *const F;
-    DominatorTree *DT;
-
-    // The maximum assigned value number
-    int MaxPtrNumber;
-    // The maximum assigned safepoint number
-    int MaxSafepointNumber;
-    // Cache of numbers assigned to IR values. This includes caching of numbers
-    // for derived values
-    std::map<Value *, int> AllPtrNumbering;
-    std::map<Value *, SmallVector<int, 0>> AllCompositeNumbering;
-    // The reverse of the previous maps
-    std::map<int, Value *> ReversePtrNumbering;
-    // Neighbors in the coloring interference graph. I.e. for each value, the
-    // indices of other values that are used simultaneously at some safe point.
-    SmallVector<LargeSparseBitVector, 0> Neighbors;
-    // The result of the local analysis
-    std::map<const BasicBlock *, BBState> BBStates;
-
-    // Refinement map. If all of the values are rooted
-    // (-1 means an externally rooted value and -2 means a globally/permanently rooted value),
-    // the key is already rooted (but not the other way around).
-    // A value that can be refined to -2 never need any rooting or write barrier.
-    // A value that can be refined to -1 don't need local root but still need write barrier.
-    // At the end of `LocalScan` this map has a few properties
-    // 1. Values are either < 0 or dominates the key
-    // 2. Therefore this is a DAG
-    std::map<int, SmallVector<int, 1>> Refinements;
-
-    // GC preserves map. All safepoints dominated by the map key, but not any
-    // of its uses need to preserve the values listed in the map value.
-    std::map<Instruction *, SmallVector<int, 0>> GCPreserves;
-
-    // The assignment of numbers to safepoints. The indices in the map
-    // are indices into the next three maps which store safepoint properties
-    std::map<Instruction *, int> SafepointNumbering;
-
-    // Reverse mapping index -> safepoint
-    SmallVector<Instruction *, 0> ReverseSafepointNumbering;
-
-    // Instructions that can return twice. For now, all values live at these
-    // instructions will get their own, dedicated GC frame slots, because they
-    // have unobservable control flow, so we can't be sure where they're
-    // actually live. All of these are also considered safepoints.
-    SmallVector<Instruction *, 0> ReturnsTwice;
-
-    // The set of values live at a particular safepoint
-    SmallVector< LargeSparseBitVector , 0> LiveSets;
-    // Those values that - if live out from our parent basic block - are live
-    // at this safepoint.
-    SmallVector<SmallVector<int, 0>> LiveIfLiveOut;
-    // The set of values that are kept alive by the callee.
-    SmallVector<SmallVector<int, 0>> CalleeRoots;
-    // We don't bother doing liveness on Allocas that were not mem2reg'ed.
-    // they just get directly sunk into the root array.
-    SmallVector<AllocaInst *, 0> Allocas;
-    DenseMap<AllocaInst *, unsigned> ArrayAllocas;
-    DenseMap<AllocaInst *, AllocaInst *> ShadowAllocas;
-    SmallVector<std::pair<StoreInst *, unsigned>, 0> TrackedStores;
-    State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
-};
-
-
-struct LateLowerGCFrame:  private JuliaPassContext {
-    function_ref<DominatorTree &()> GetDT;
-    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
-
-public:
-    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
-
-private:
-    CallInst *pgcstack;
-    Function *poolAllocFunc;
-
-    void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const ArrayRef<int> &SafepointsSoFar,
-                      SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
-    void NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses);
-    void NoteUse(State &S, BBState &BBS, Value *V) {
-        NoteUse(S, BBS, V, BBS.UpExposedUses);
-    }
-
-    void LiftPhi(State &S, PHINode *Phi);
-    void LiftSelect(State &S, SelectInst *SI);
-    Value *MaybeExtractScalar(State &S, std::pair<Value*,int> ValExpr, Instruction *InsertBefore);
-    SmallVector<Value*, 0> MaybeExtractVector(State &S, Value *BaseVec, Instruction *InsertBefore);
-    Value *GetPtrForNumber(State &S, unsigned Num, Instruction *InsertBefore);
-
-    int Number(State &S, Value *V);
-    int NumberBase(State &S, Value *Base);
-    SmallVector<int, 0> NumberAll(State &S, Value *V);
-    SmallVector<int, 0> NumberAllBase(State &S, Value *Base);
-
-    void NoteOperandUses(State &S, BBState &BBS, User &UI);
-    void MaybeTrackDst(State &S, MemTransferInst *MI);
-    void MaybeTrackStore(State &S, StoreInst *I);
-    State LocalScan(Function &F);
-    void ComputeLiveness(State &S);
-    void ComputeLiveSets(State &S);
-    SmallVector<int, 0> ColorRoots(const State &S);
-    void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame, Instruction *InsertBefore);
-    void PlaceGCFrameStores(State &S, unsigned MinColorRoot, ArrayRef<int> Colors, Value *GCFrame);
-    void PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
-    bool CleanupIR(Function &F, State *S, bool *CFGModified);
-    void NoteUseChain(State &S, BBState &BBS, User *TheUser);
-    SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
-    void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
-    void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef<int> CalleeRoots);
-    Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V);
-    Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V);
-
-#ifdef MMTK_GC
-    Value* lowerGCAllocBytesLate(CallInst *target, Function &F, State &S);
-#endif
-};
-
 static unsigned getValueAddrSpace(Value *V) {
     return V->getType()->getPointerAddressSpace();
 }
@@ -713,15 +350,7 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
                     ConstantInt::get(Type::getInt32Ty(Cond->getContext()), i),
                     "", SI);
         }
-        #if JL_LLVM_VERSION >= 170000
         assert(FalseElem->getType() == TrueElem->getType());
-        #else
-        if (FalseElem->getType() != TrueElem->getType()) {
-            // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-            assert(FalseElem->getContext().supportsTypedPointers());
-            FalseElem = new BitCastInst(FalseElem, TrueElem->getType(), "", SI);
-        }
-        #endif
         SelectInst *SelectBase = SelectInst::Create(Cond, TrueElem, FalseElem, "gclift", SI);
         int Number = ++S.MaxPtrNumber;
         S.AllPtrNumbering[SelectBase] = Number;
@@ -790,33 +419,7 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
                 BaseElem = Base;
             else
                 BaseElem = IncomingBases[i];
-            #if JL_LLVM_VERSION >= 170000
             assert(BaseElem->getType() == T_prjlvalue);
-            #else
-            if (BaseElem->getType() != T_prjlvalue) {
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(BaseElem->getContext().supportsTypedPointers());
-                auto &remap = CastedRoots[i][BaseElem];
-                if (!remap) {
-                    if (auto constant = dyn_cast<Constant>(BaseElem)) {
-                        remap = ConstantExpr::getBitCast(constant, T_prjlvalue, "");
-                    } else {
-                        Instruction *InsertBefore;
-                        if (auto arg = dyn_cast<Argument>(BaseElem)) {
-                            InsertBefore = &*arg->getParent()->getEntryBlock().getFirstInsertionPt();
-                        } else {
-                            assert(isa<Instruction>(BaseElem) && "Unknown value type detected!");
-                            InsertBefore = cast<Instruction>(BaseElem)->getNextNonDebugInstruction();
-                        }
-                        while (isa<PHINode>(InsertBefore)) {
-                            InsertBefore = InsertBefore->getNextNonDebugInstruction();
-                        }
-                        remap = new BitCastInst(BaseElem, T_prjlvalue, "", InsertBefore);
-                    }
-                }
-                BaseElem = remap;
-            }
-            #endif
             lift->addIncoming(BaseElem, IncomingBB);
         }
     }
@@ -1092,8 +695,15 @@ static int NoteSafepoint(State &S, BBState &BBS, CallInst *CI, SmallVectorImpl<i
     return Number;
 }
 
-void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses) {
+void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitVector &Uses, Function &F) {
     // Short circuit to avoid having to deal with vectors of constants, etc.
+//#ifndef NDEBUG
+//    if (isa<PointerType>(V->getType())) {
+//        if (isSpecialPtr(V->getType()))
+//            if (isa<UndefValue>(V) && !isa<PoisonValue>(V))
+//                F.dump();
+//    }
+//#endif
     if (isa<Constant>(V))
         return;
     if (isa<PointerType>(V->getType())) {
@@ -1115,9 +725,9 @@ void LateLowerGCFrame::NoteUse(State &S, BBState &BBS, Value *V, LargeSparseBitV
     }
 }
 
-void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, User &UI) {
+void LateLowerGCFrame::NoteOperandUses(State &S, BBState &BBS, Instruction &UI) {
     for (Use &U : UI.operands()) {
-        NoteUse(S, BBS, U);
+        NoteUse(S, BBS, U, *UI.getFunction());
     }
 }
 
@@ -1774,7 +1384,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     unsigned nIncoming = Phi->getNumIncomingValues();
                     for (unsigned i = 0; i < nIncoming; ++i) {
                         BBState &IncomingBBS = S.BBStates[Phi->getIncomingBlock(i)];
-                        NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts);
+                        NoteUse(S, IncomingBBS, Phi->getIncomingValue(i), IncomingBBS.PhiOuts, F);
                     }
                 } else if (tracked.count) {
                     // We need to insert extra phis for the GC roots
@@ -1800,7 +1410,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
             } else if (auto *AI = dyn_cast<AllocaInst>(&I)) {
                 Type *ElT = AI->getAllocatedType();
                 if (AI->isStaticAlloca() && isa<PointerType>(ElT) && ElT->getPointerAddressSpace() == AddressSpace::Tracked) {
-                    S.Allocas.push_back(AI);
+                    S.ArrayAllocas[AI] = cast<ConstantInt>(AI->getArraySize())->getZExtValue();
                 }
             }
         }
@@ -1891,21 +1501,17 @@ SmallVector<Value*, 0> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, I
     return Ptrs;
 }
 
-unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) {
-    auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
-    for (unsigned i = 0; i < Ptrs.size(); ++i) {
-        Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*`
-        Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i);
-        #if JL_LLVM_VERSION < 170000
-        assert(cast<PointerType>(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy));
-        #endif
-        StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
-        shadowStore->setOrdering(AtomicOrdering::NotAtomic);
-        // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    }
-    return Ptrs.size();
-}
-
+//static unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) {
+//    auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
+//    for (unsigned i = 0; i < Ptrs.size(); ++i) {
+//        Value *Elem = Ptrs[i];
+//        Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(irbuilder.getInt8Ty(), Dst, i * sizeof(void*));
+//        StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
+//        shadowStore->setOrdering(AtomicOrdering::NotAtomic);
+//        // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
+//    }
+//    return Ptrs.size();
+//}
 
 // turn a memcpy into a set of loads
 void LateLowerGCFrame::MaybeTrackDst(State &S, MemTransferInst *MI) {
@@ -2326,6 +1932,112 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
     return MDBuilder(Tag->getContext()).createMutableTBAAAccessTag(Tag);
 }
 
+void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
+    auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
+    for (auto CI : WriteBarriers) {
+        auto parent = CI->getArgOperand(0);
+        if (std::all_of(CI->op_begin() + 1, CI->op_end(),
+                    [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
+            CI->eraseFromParent();
+            continue;
+        }
+        if (CFGModified) {
+            *CFGModified = true;
+        }
+
+        IRBuilder<> builder(CI);
+        builder.SetCurrentDebugLocation(CI->getDebugLoc());
+#ifndef MMTK_GC
+        auto DebugInfoMeta = F.getParent()->getModuleFlag("julia.debug_level");
+        int debug_info = 1;
+        if (DebugInfoMeta != nullptr) {
+            debug_info = cast<ConstantInt>(cast<ConstantAsMetadata>(DebugInfoMeta)->getValue())->getZExtValue();
+        }
+        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED);
+        setName(parBits, "parent_bits", debug_info);
+        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED));
+        setName(parOldMarked, "parent_old_marked", debug_info);
+        auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
+        builder.SetInsertPoint(mayTrigTerm);
+        setName(mayTrigTerm->getParent(), "may_trigger_wb", debug_info);
+        Value *anyChldNotMarked = NULL;
+        for (unsigned i = 1; i < CI->arg_size(); i++) {
+            Value *child = CI->getArgOperand(i);
+            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED);
+            setName(chldBit, "child_bit", debug_info);
+            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0),"child_not_marked");
+            setName(chldNotMarked, "child_not_marked", debug_info);
+            anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
+        }
+        assert(anyChldNotMarked); // handled by all_of test above
+        MDBuilder MDB(parent->getContext());
+        SmallVector<uint32_t, 2> Weights{1, 9};
+        auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
+                                                  MDB.createBranchWeights(Weights));
+        setName(trigTerm->getParent(), "trigger_wb", debug_info);
+        builder.SetInsertPoint(trigTerm);
+        if (CI->getCalledOperand() == write_barrier_func) {
+            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
+        }
+        else {
+            assert(false);
+        }
+#else
+        // FIXME: Currently we call write barrier with the src object (parent).
+        // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all.
+        // But for other MMTk plans, we need to be careful.
+        const bool INLINE_WRITE_BARRIER = true;
+        if (CI->getCalledOperand() == write_barrier_func) {
+            if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
+                if (INLINE_WRITE_BARRIER) {
+                    auto i8_ty = Type::getInt8Ty(F.getContext());
+                    auto intptr_ty = T_size;
+
+                    // intptr_t addr = (intptr_t) (void*) src;
+                    // uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6));
+                    intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS);
+                    auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
+                    auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));
+
+                    auto parent_val = builder.CreatePtrToInt(parent, intptr_ty);
+                    auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6));
+                    auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);
+
+                    // intptr_t shift = (addr >> 3) & 0b111;
+                    auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));
+                    auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty);
+
+                    // uint8_t byte_val = *meta_addr;
+                    auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());
+
+                    // if (((byte_val >> shift) & 1) == 1) {
+                    auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8);
+                    auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1));
+                    auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1));
+
+                    // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target);
+                    MDBuilder MDB(F.getContext());
+                    SmallVector<uint32_t, 2> Weights{1, 9};
+                    if (!S->DT) {
+                        S->DT = &GetDT();
+                    }
+                    DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy);
+                    auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu);
+                    builder.SetInsertPoint(mayTriggerSlowpath);
+                    builder.CreateCall(getOrDeclare(jl_intrinsics::writeBarrier1Slow), { parent });
+                } else {
+                    Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1);
+                    builder.CreateCall(wb_func, { parent });
+                }
+            }
+        } else {
+            assert(false);
+        }
+#endif
+        CI->eraseFromParent();
+    }
+}
+
 bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
     auto T_int32 = Type::getInt32Ty(F.getContext());
     auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
@@ -2443,7 +2155,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
                 // the type tag. (Note that if the size is not a constant, it will call
                 // gc_alloc_obj, and will redundantly set the tag.)
                 auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
-                auto ptls = get_current_ptls_from_task(builder, T_size, CI->getArgOperand(0), tbaa_gcframe);
+                auto ptls = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
                 auto newI = builder.CreateCall(
                     allocBytesIntrinsic,
                     {
@@ -2571,109 +2283,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
             ChangesMade = true;
         }
     }
-    for (auto CI : write_barriers) {
-        auto parent = CI->getArgOperand(0);
-        if (std::all_of(CI->op_begin() + 1, CI->op_end(),
-                    [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
-            CI->eraseFromParent();
-            continue;
-        }
-        if (CFGModified) {
-            *CFGModified = true;
-        }
-
-        IRBuilder<> builder(CI);
-        builder.SetCurrentDebugLocation(CI->getDebugLoc());
-#ifndef MMTK_GC
-        auto DebugInfoMeta = F.getParent()->getModuleFlag("julia.debug_level");
-        int debug_info = 1;
-        if (DebugInfoMeta != nullptr) {
-            debug_info = cast<ConstantInt>(cast<ConstantAsMetadata>(DebugInfoMeta)->getValue())->getZExtValue();
-        }
-        auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED);
-        setName(parBits, "parent_bits", debug_info);
-        auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED));
-        setName(parOldMarked, "parent_old_marked", debug_info);
-        auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
-        builder.SetInsertPoint(mayTrigTerm);
-        setName(mayTrigTerm->getParent(), "may_trigger_wb", debug_info);
-        Value *anyChldNotMarked = NULL;
-        for (unsigned i = 1; i < CI->arg_size(); i++) {
-            Value *child = CI->getArgOperand(i);
-            Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED);
-            setName(chldBit, "child_bit", debug_info);
-            Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0),"child_not_marked");
-            setName(chldNotMarked, "child_not_marked", debug_info);
-            anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
-        }
-        assert(anyChldNotMarked); // handled by all_of test above
-        MDBuilder MDB(parent->getContext());
-        SmallVector<uint32_t, 2> Weights{1, 9};
-        auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
-                                                  MDB.createBranchWeights(Weights));
-        setName(trigTerm->getParent(), "trigger_wb", debug_info);
-        builder.SetInsertPoint(trigTerm);
-        if (CI->getCalledOperand() == write_barrier_func) {
-            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
-        }
-        else {
-            assert(false);
-        }
-#else
-        // FIXME: Currently we call write barrier with the src object (parent).
-        // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all.
-        // But for other MMTk plans, we need to be careful.
-        const bool INLINE_WRITE_BARRIER = true;
-        if (CI->getCalledOperand() == write_barrier_func) {
-            if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
-                if (INLINE_WRITE_BARRIER) {
-                    auto i8_ty = Type::getInt8Ty(F.getContext());
-                    auto intptr_ty = T_size;
-
-                    // intptr_t addr = (intptr_t) (void*) src;
-                    // uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6));
-                    intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS);
-                    auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
-                    auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));
-
-                    auto parent_val = builder.CreatePtrToInt(parent, intptr_ty);
-                    auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6));
-                    auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);
-
-                    // intptr_t shift = (addr >> 3) & 0b111;
-                    auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));
-                    auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty);
-
-                    // uint8_t byte_val = *meta_addr;
-                    auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());
-
-                    // if (((byte_val >> shift) & 1) == 1) {
-                    auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8);
-                    auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1));
-                    auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1));
-
-                    // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target);
-                    MDBuilder MDB(F.getContext());
-                    SmallVector<uint32_t, 2> Weights{1, 9};
-                    if (!S->DT) {
-                        S->DT = &GetDT();
-                    }
-                    DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy);
-                    auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu);
-                    builder.SetInsertPoint(mayTriggerSlowpath);
-                    builder.CreateCall(getOrDeclare(jl_intrinsics::writeBarrier1Slow), { parent });
-                } else {
-                    Function *wb_func = getOrDeclare(jl_intrinsics::writeBarrier1);
-                    builder.CreateCall(wb_func, { parent });
-                }
-            }
-        } else {
-            assert(false);
-        }
-#endif
-
-        CI->eraseFromParent();
-    }
+    CleanupWriteBarriers(F, S, write_barriers, CFGModified);
     if (maxframeargs == 0 && Frame) {
         Frame->eraseFromParent();
     }
@@ -2731,15 +2341,7 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor
     // Pointee types don't have semantics, so the optimizer is
     // free to rewrite them if convenient. We need to change
     // it back here for the store.
-    #if JL_LLVM_VERSION >= 170000
     assert(Val->getType() == T_prjlvalue);
-    #else
-    if (Val->getType() != T_prjlvalue) {
-        // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-        assert(Val->getContext().supportsTypedPointers());
-        Val = new BitCastInst(Val, T_prjlvalue, "", InsertBefore);
-    }
-    #endif
     new StoreInst(Val, slotAddress, InsertBefore);
 }
 
@@ -2778,7 +2380,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
             MaxColor = C;
 
     // Insert instructions for the actual gc frame
-    if (MaxColor != -1 || !S.Allocas.empty() || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) {
+    if (MaxColor != -1 || !S.ArrayAllocas.empty() || !S.TrackedStores.empty()) {
         // Create and push a GC frame.
         auto gcframe = CallInst::Create(
             getOrDeclare(jl_intrinsics::newGCFrame),
@@ -2791,6 +2393,43 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
             {gcframe, ConstantInt::get(T_int32, 0)});
         pushGcframe->insertAfter(pgcstack);
 
+        // we don't run memsetopt after this, so run a basic approximation of it
+        // that removes any redundant memset calls in the prologue since getGCFrameSlot already includes the null store
+        Instruction *toerase = nullptr;
+        for (auto &I : F->getEntryBlock()) {
+            if (toerase)
+                toerase->eraseFromParent();
+            toerase = nullptr;
+            Value *ptr;
+            Value *value;
+            bool isvolatile;
+            if (auto *SI = dyn_cast<StoreInst>(&I)) {
+                ptr = SI->getPointerOperand();
+                value = SI->getValueOperand();
+                isvolatile = SI->isVolatile();
+            }
+            else if (auto *MSI = dyn_cast<MemSetInst>(&I)) {
+                ptr = MSI->getDest();
+                value = MSI->getValue();
+                isvolatile = MSI->isVolatile();
+            }
+            else {
+                continue;
+            }
+            ptr = ptr->stripInBoundsOffsets();
+            AllocaInst *AI = dyn_cast<AllocaInst>(ptr);
+            if (isa<GetElementPtrInst>(ptr))
+                break;
+            if (!S.ArrayAllocas.count(AI))
+                continue;
+            if (isvolatile || !isa<Constant>(value) || !cast<Constant>(value)->isNullValue())
+                break; // stop once we reach a pointer operation that couldn't be analyzed or isn't a null store
+            toerase = &I;
+        }
+        if (toerase)
+            toerase->eraseFromParent();
+        toerase = nullptr;
+
         // Replace Allocas
         unsigned AllocaSlot = 2; // first two words are metadata
         auto replace_alloca = [this, gcframe, &AllocaSlot, T_int32](AllocaInst *&AI) {
@@ -2819,27 +2458,11 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
             for (CallInst *II : ToDelete) {
                 II->eraseFromParent();
             }
-            #if JL_LLVM_VERSION >= 170000
             assert(slotAddress->getType() == AI->getType());
-            #else
-            if (slotAddress->getType() != AI->getType()) {
-                // If we're replacing an ArrayAlloca, the pointer element type may need to be fixed up
-                // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                assert(slotAddress->getContext().supportsTypedPointers());
-                auto BCI  = new BitCastInst(slotAddress, AI->getType());
-                BCI->insertAfter(slotAddress);
-                slotAddress = BCI;
-            }
-            #endif
             AI->replaceAllUsesWith(slotAddress);
             AI->eraseFromParent();
             AI = NULL;
         };
-        for (AllocaInst *AI : S.Allocas) {
-            auto ns = cast<ConstantInt>(AI->getArraySize())->getZExtValue();
-            replace_alloca(AI);
-            AllocaSlot += ns;
-        }
         for (auto AI : S.ArrayAllocas) {
             replace_alloca(AI.first);
             AllocaSlot += AI.second;
@@ -2855,15 +2478,7 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
                 slotAddress->insertAfter(gcframe);
                 auto ValExpr = std::make_pair(Base, isa<PointerType>(Base->getType()) ? -1 : i);
                 auto Elem = MaybeExtractScalar(S, ValExpr, SI);
-                #if JL_LLVM_VERSION >= 170000
                 assert(Elem->getType() == T_prjlvalue);
-                #else
-                if (Elem->getType() != T_prjlvalue) {
-                    // Shouldn't get here when using opaque pointers, so the new BitCastInst is fine
-                    assert(Elem->getContext().supportsTypedPointers());
-                    Elem = new BitCastInst(Elem, T_prjlvalue, "", SI);
-                }
-                #endif
                 //auto Idxs = ArrayRef<unsigned>(Tracked[i]);
                 //Value *Elem = ExtractScalar(Base, true, Idxs, SI);
                 Value *shadowStore = new StoreInst(Elem, slotAddress, SI);
@@ -2890,7 +2505,8 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(SmallVectorImpl<int> &Colors, St
     }
 }
 
-Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F, State &S)
+#ifdef MMTK_GC
+Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
 {
     assert(target->arg_size() == 3);
 
@@ -2903,7 +2519,9 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F, St
         int osize;
         int offset = jl_gc_classify_pools(sz, &osize);
         if (offset >= 0) {
-            // In this case julia.gc_alloc_bytes will simply become a call to jl_gc_pool_alloc in the final GC lowering pass
+            // In this case instead of lowering julia.gc_alloc_bytes to jl_gc_small_alloc
+            // We do a slowpath/fastpath check and lower it only on the slowpath, returning
+            // the cursor and updating it in the fastpath.
             auto pool_osize_i32 = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
             auto pool_osize = ConstantInt::get(Type::getInt64Ty(F.getContext()), osize);
 
@@ -2914,7 +2532,7 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F, St
             if (INLINE_FASTPATH_ALLOCATION) {
                 // Assuming we use the first immix allocator.
                 // FIXME: We should get the allocator index and type from MMTk.
-                auto allocator_offset = offsetof(jl_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix);
+                auto allocator_offset = offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, mmtk_mutator) + offsetof(MMTkMutatorContext, allocators) + offsetof(Allocators, immix);
 
                 auto cursor_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), allocator_offset + offsetof(ImmixAllocator, cursor));
                 auto limit_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()),  allocator_offset + offsetof(ImmixAllocator, limit));
@@ -2943,13 +2561,10 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F, St
                 auto fastpath = BasicBlock::Create(target->getContext(), "fastpath", target->getFunction());
 
                 auto next_instr = target->getNextNode();
-                if (!S.DT) {
-                    S.DT = &GetDT();
-                }
-                DomTreeUpdater dtu = DomTreeUpdater(S.DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy);
-                MDBuilder MDB(F.getContext());
                 SmallVector<uint32_t, 2> Weights{1, 9};
-                SplitBlockAndInsertIfThenElse(gt_limit, next_instr, &slowpath, &fastpath, false, false, MDB.createBranchWeights(Weights), &dtu);
+
+                MDBuilder MDB(F.getContext());
+                SplitBlockAndInsertIfThenElse(gt_limit, next_instr, &slowpath, &fastpath, false, false, MDB.createBranchWeights(Weights));
 
                 builder.SetInsertPoint(next_instr);
                 auto phiNode = builder.CreatePHI(target->getCalledFunction()->getReturnType(), 2, "phi_fast_slow");
@@ -2957,16 +2572,16 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F, St
                 // slowpath
                 builder.SetInsertPoint(slowpath);
                 auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
-                auto new_call = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
+                auto new_call = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize_i32, type });
                 new_call->setAttributes(new_call->getCalledFunction()->getAttributes());
                 builder.CreateBr(next_instr->getParent());
 
-                // // fastpath
+                // fastpath
                 builder.SetInsertPoint(fastpath);
                 builder.CreateStore(new_cursor, cursor_ptr);
 
-                // ptls->gc_num.allocd += osize;
-                auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls) + offsetof(jl_gc_tls_states_t, gc_num));
+                // ptls->gc_tls.gc_num.allocd += osize;
+                auto pool_alloc_pos = ConstantInt::get(Type::getInt64Ty(target->getContext()), offsetof(jl_tls_states_t, gc_tls_common) + offsetof(jl_gc_tls_states_common_t, gc_num));
                 auto pool_alloc_i8 = builder.CreateGEP(Type::getInt8Ty(target->getContext()), ptls, pool_alloc_pos);
                 auto pool_alloc_tls = builder.CreateBitCast(pool_alloc_i8, PointerType::get(Type::getInt64Ty(target->getContext()), 0), "pool_alloc");
                 auto pool_allocd = builder.CreateLoad(Type::getInt64Ty(target->getContext()), pool_alloc_tls);
@@ -2974,7 +2589,7 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F, St
                 builder.CreateStore(pool_allocd_total, pool_alloc_tls);
 
                 auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t)));
-                auto v_as_ptr = builder.CreateIntToPtr(v_raw, poolAllocFunc->getReturnType());
+                auto v_as_ptr = builder.CreateIntToPtr(v_raw, smallAllocFunc->getReturnType());
                 builder.CreateBr(next_instr->getParent());
 
                 phiNode->addIncoming(new_call, slowpath);
@@ -3001,10 +2616,11 @@ static void replaceInstruction(
         ++it;
     }
 }
+#endif
 
 bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
     initAll(*F.getParent());
-    poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
+    smallAllocFunc = getOrDeclare(jl_well_known::GCSmallAlloc);
     LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
     if (!pgcstack_getter && !adoptthread_func)
         return CleanupIR(F, nullptr, CFGModified);
@@ -3035,13 +2651,15 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
 
             auto GCAllocBytes = getOrNull(jl_intrinsics::GCAllocBytes);
             if (GCAllocBytes == callee) {
-                replaceInstruction(CI, lowerGCAllocBytesLate(CI, F, S), it);
+                *CFGModified = true;
+                replaceInstruction(CI, lowerGCAllocBytesLate(CI, F), it);
                 continue;
             }
             ++it;
         }
     }
 #endif
+
     return true;
 }
 
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index 08600e24490b1..d544f182637b9 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -100,11 +100,11 @@ static uint32_t collect_func_info(Function &F, const Triple &TT, bool &has_vecca
                 }
                 if (auto callee = call->getCalledFunction()) {
                     auto name = callee->getName();
-                    if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
+                    if (name.starts_with("llvm.muladd.") || name.starts_with("llvm.fma.")) {
                         flag |= JL_TARGET_CLONE_MATH;
                     }
-                    else if (name.startswith("julia.cpu.")) {
-                        if (name.startswith("julia.cpu.have_fma.")) {
+                    else if (name.starts_with("julia.cpu.")) {
+                        if (name.starts_with("julia.cpu.have_fma.")) {
                             // for some platforms we know they always do (or don't) support
                             // FMA. in those cases we don't need to clone the function.
                             // always_have_fma returns an optional<bool>
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index 6d6c3898e875c..07a4ba8925996 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -344,7 +344,7 @@ namespace jl_intrinsics {
 
 namespace jl_well_known {
     static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
-    static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
+    static const char *GC_SMALL_ALLOC_NAME = XSTR(jl_gc_small_alloc);
     static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
     static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed);
 #ifdef MMTK_GC
@@ -372,20 +372,20 @@ namespace jl_well_known {
             return addGCAllocAttributes(bigAllocFunc);
         });
 
-    const WellKnownFunctionDescription GCPoolAlloc(
-        GC_POOL_ALLOC_NAME,
+    const WellKnownFunctionDescription GCSmallAlloc(
+        GC_SMALL_ALLOC_NAME,
         [](Type *T_size) {
             auto &ctx = T_size->getContext();
             auto T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
-            auto poolAllocFunc = Function::Create(
+            auto smallAllocFunc = Function::Create(
                 FunctionType::get(
                     T_prjlvalue,
                     { PointerType::get(ctx, 0), Type::getInt32Ty(ctx), Type::getInt32Ty(ctx), T_size },
                     false),
                 Function::ExternalLinkage,
-                GC_POOL_ALLOC_NAME);
-            poolAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
-            return addGCAllocAttributes(poolAllocFunc);
+                GC_SMALL_ALLOC_NAME);
+            smallAllocFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 2, None));
+            return addGCAllocAttributes(smallAllocFunc);
         });
 
     const WellKnownFunctionDescription GCQueueRoot(
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index 6b74e0f2c5bb5..e41853bb85dff 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -154,8 +154,8 @@ namespace jl_well_known {
     // `jl_gc_big_alloc`: allocates bytes.
     extern const WellKnownFunctionDescription GCBigAlloc;
 
-    // `jl_gc_pool_alloc`: allocates bytes.
-    extern const WellKnownFunctionDescription GCPoolAlloc;
+    // `jl_gc_small_alloc`: allocates bytes.
+    extern const WellKnownFunctionDescription GCSmallAlloc;
 
     // `jl_gc_queue_root`: queues a GC root.
     extern const WellKnownFunctionDescription GCQueueRoot;
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index 9e49aa5ba2f39..488dd46cade21 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -128,7 +128,7 @@ Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefor
             offset = ConstantInt::getSigned(T_size, jl_tls_offset);
         auto tp = InlineAsm::get(FunctionType::get(PointerType::get(builder.getContext(), 0), false), asm_str, "=r", false);
         tls = builder.CreateCall(tp, {}, "thread_ptr");
-        tls = builder.CreateGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
+        tls = builder.CreateInBoundsGEP(Type::getInt8Ty(builder.getContext()), tls, {offset}, "tls_ppgcstack");
     }
     return builder.CreateLoad(T_pppjlvalue, tls, "tls_pgcstack");
 }
@@ -191,7 +191,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
         builder.SetInsertPoint(fastTerm->getParent());
         fastTerm->removeFromParent();
         MDNode *tbaa = tbaa_gcframe;
-        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, pgcstack), tbaa), true);
+        Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true);
         builder.Insert(fastTerm);
         phi->addIncoming(pgcstack, fastTerm->getParent());
         // emit pre-return cleanup
@@ -203,7 +203,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
             for (auto &BB : *pgcstack->getParent()->getParent()) {
                 if (isa<ReturnInst>(BB.getTerminator())) {
                     builder.SetInsertPoint(BB.getTerminator());
-                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, T_size, get_current_task_from_pgcstack(builder, T_size, phi), tbaa), last_gc_state, true);
+                    emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true);
                 }
             }
         }
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index f29802b438e1e..07afa8c930deb 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -177,7 +177,7 @@ static bool processLoop(Loop &L, OptimizationRemarkEmitter &ORE, ScalarEvolution
         const MDString *S = dyn_cast<MDString>(Op);
         if (S) {
             LLVM_DEBUG(dbgs() << "LSL: found " << S->getString() << "\n");
-            if (S->getString().startswith("julia")) {
+            if (S->getString().starts_with("julia")) {
                 if (S->getString().equals("julia.simdloop"))
                     simd = true;
                 if (S->getString().equals("julia.ivdep"))
diff --git a/src/llvm_api.cpp b/src/llvm_api.cpp
index d56fb3a0497fa..e98c375b711b3 100644
--- a/src/llvm_api.cpp
+++ b/src/llvm_api.cpp
@@ -21,6 +21,7 @@
 #include <llvm/Support/CBindingWrapping.h>
 #include <llvm/Support/MemoryBuffer.h>
 
+#if JL_LLVM_VERSION < 180000
 namespace llvm {
 namespace orc {
 class OrcV2CAPIHelper {
@@ -38,7 +39,7 @@ class OrcV2CAPIHelper {
 };
 } // namespace orc
 } // namespace llvm
-
+#endif
 
 typedef struct JLOpaqueJuliaOJIT *JuliaOJITRef;
 typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
@@ -46,8 +47,13 @@ typedef struct LLVMOrcOpaqueIRCompileLayer *LLVMOrcIRCompileLayerRef;
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(JuliaOJIT, JuliaOJITRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::JITDylib, LLVMOrcJITDylibRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::ExecutionSession, LLVMOrcExecutionSessionRef)
+#if JL_LLVM_VERSION >= 180000
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::SymbolStringPoolEntryUnsafe::PoolEntry,
+                                   LLVMOrcSymbolStringPoolEntryRef)
+#else
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::OrcV2CAPIHelper::PoolEntry,
                                    LLVMOrcSymbolStringPoolEntryRef)
+#endif
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::IRCompileLayer, LLVMOrcIRCompileLayerRef)
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(orc::MaterializationResponsibility,
                                    LLVMOrcMaterializationResponsibilityRef)
@@ -113,7 +119,11 @@ JL_DLLEXPORT_CODEGEN LLVMOrcSymbolStringPoolEntryRef
 JLJITMangleAndIntern_impl(JuliaOJITRef JIT,
                                             const char *Name)
 {
+#if JL_LLVM_VERSION >= 180000
+    return wrap(orc::SymbolStringPoolEntryUnsafe::take(unwrap(JIT)->mangle(Name)).rawPtr());
+#else
     return wrap(orc::OrcV2CAPIHelper::moveFromSymbolStringPtr(unwrap(JIT)->mangle(Name)));
+#endif
 }
 
 JL_DLLEXPORT_CODEGEN const char *
diff --git a/src/method.c b/src/method.c
index 549575286bc7e..629816319b334 100644
--- a/src/method.c
+++ b/src/method.c
@@ -237,11 +237,9 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 if (fe_mod->istopmod && !strcmp(jl_symbol_name(fe_sym), "getproperty") && jl_is_symbol(s)) {
                     if (eager_resolve || jl_binding_resolved_p(me_mod, me_sym)) {
                         jl_binding_t *b = jl_get_binding(me_mod, me_sym);
-                        if (b && b->constp) {
-                            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
-                            if (v && jl_is_module(v))
-                                return jl_module_globalref((jl_module_t*)v, (jl_sym_t*)s);
-                        }
+                        jl_value_t *v = jl_get_binding_value_if_const(b);
+                        if (v && jl_is_module(v))
+                            return jl_module_globalref((jl_module_t*)v, (jl_sym_t*)s);
                     }
                 }
             }
@@ -254,7 +252,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 if (jl_binding_resolved_p(fe_mod, fe_sym)) {
                     // look at some known called functions
                     jl_binding_t *b = jl_get_binding(fe_mod, fe_sym);
-                    if (b && b->constp && jl_atomic_load_relaxed(&b->value) == jl_builtin_tuple) {
+                    if (jl_get_binding_value_if_const(b) == jl_builtin_tuple) {
                         size_t j;
                         for (j = 1; j < nargs; j++) {
                             if (!jl_is_quotenode(jl_exprarg(e, j)))
@@ -491,6 +489,10 @@ jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
                         if (noub_if_noinbounds) li->purity.overrides.ipo_noub_if_noinbounds = noub_if_noinbounds;
                         int8_t consistent_overlay = jl_unbox_bool(jl_exprarg(ma, 9));
                         if (consistent_overlay) li->purity.overrides.ipo_consistent_overlay = consistent_overlay;
+                        int8_t nortcall = jl_unbox_bool(jl_exprarg(ma, 10));
+                        if (nortcall) li->purity.overrides.ipo_nortcall = nortcall;
+                    } else {
+                        assert(jl_expr_nargs(ma) == 0);
                     }
                 }
                 else
@@ -629,7 +631,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
     mi->backedges = NULL;
     jl_atomic_store_relaxed(&mi->cache, NULL);
     mi->cache_with_orig = 0;
-    jl_atomic_store_relaxed(&mi->precompiled, 0);
+    jl_atomic_store_relaxed(&mi->flags, 0);
     return mi;
 }
 
@@ -1122,29 +1124,24 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name
     return m;
 }
 
-// empty generic function def
-JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
-                                                 jl_module_t *module,
-                                                 _Atomic(jl_value_t*) *bp,
-                                                 jl_binding_t *bnd)
+JL_DLLEXPORT void jl_check_gf(jl_value_t *gf, jl_sym_t *name)
 {
-    jl_value_t *gf = NULL;
-
-    assert(name && bp);
-    if (bnd && jl_atomic_load_relaxed(&bnd->value) != NULL && !bnd->constp)
+    if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
         jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
-    gf = jl_atomic_load_relaxed(bp);
-    if (gf != NULL) {
-        if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
-            jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
-    }
-    if (bnd)
-        bnd->constp = 1; // XXX: use jl_declare_constant and jl_checked_assignment
-    if (gf == NULL) {
-        gf = (jl_value_t*)jl_new_generic_function(name, module);
-        jl_atomic_store(bp, gf); // TODO: fix constp assignment data race
-        if (bnd) jl_gc_wb(bnd, gf);
+}
+
+JL_DLLEXPORT jl_value_t *jl_declare_const_gf(jl_binding_t *b, jl_module_t *mod, jl_sym_t *name)
+{
+    jl_value_t *gf = jl_get_binding_value_if_const(b);
+    if (gf) {
+        jl_check_gf(gf, b->globalref->name);
+        return gf;
     }
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!jl_bkind_is_some_guard(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction))))
+        jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
+    gf = (jl_value_t*)jl_new_generic_function(name, mod);
+    jl_declare_constant_val(b, mod, name, gf);
     return gf;
 }
 
diff --git a/src/mmtk-gc.c b/src/mmtk-gc.c
deleted file mode 100644
index 5a104c4856c54..0000000000000
--- a/src/mmtk-gc.c
+++ /dev/null
@@ -1,584 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#ifdef MMTK_GC
-
-#include "gc.h"
-#include "mmtk_julia.h"
-#include "julia_gcext.h"
-
-// callbacks
-// ---
-
-typedef void (*jl_gc_cb_func_t)(void);
-
-JL_DLLEXPORT void jl_gc_set_cb_root_scanner(jl_gc_cb_root_scanner_t cb, int enable)
-{
-}
-JL_DLLEXPORT void jl_gc_set_cb_task_scanner(jl_gc_cb_task_scanner_t cb, int enable)
-{
-}
-JL_DLLEXPORT void jl_gc_set_cb_pre_gc(jl_gc_cb_pre_gc_t cb, int enable)
-{
-}
-JL_DLLEXPORT void jl_gc_set_cb_post_gc(jl_gc_cb_post_gc_t cb, int enable)
-{
-}
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_alloc(jl_gc_cb_notify_external_alloc_t cb, int enable)
-{
-}
-JL_DLLEXPORT void jl_gc_set_cb_notify_external_free(jl_gc_cb_notify_external_free_t cb, int enable)
-{
-}
-JL_DLLEXPORT void jl_gc_set_cb_notify_gc_pressure(jl_gc_cb_notify_gc_pressure_t cb, int enable)
-{
-}
-
-// mutex for page profile
-uv_mutex_t page_profile_lock;
-
-JL_DLLEXPORT void jl_gc_take_page_profile(ios_t *stream)
-{
-    uv_mutex_lock(&page_profile_lock);
-    const char *str = "Page profiler in unsupported in MMTk.";
-    ios_write(stream, str, strlen(str));
-    uv_mutex_unlock(&page_profile_lock);
-}
-
-JL_DLLEXPORT double jl_gc_page_utilization_stats[JL_GC_N_MAX_POOLS];
-
-STATIC_INLINE void gc_dump_page_utilization_data(void) JL_NOTSAFEPOINT
-{
-    // FIXME: MMTk would have to provide its own stats
-}
-
-#define MMTK_GC_PAGE_SZ (1 << 12) // MMTk's page size is defined in mmtk-core constants
-
-JL_DLLEXPORT uint64_t jl_get_pg_size(void)
-{
-    return MMTK_GC_PAGE_SZ;
-}
-
-inline void maybe_collect(jl_ptls_t ptls)
-{
-    // Just do a safe point for general maybe_collect
-    jl_gc_safepoint_(ptls);
-}
-
-// This is only used for malloc. We need to know if we need to do GC. However, keeping checking with MMTk (mmtk_gc_poll),
-// is expensive. So we only check for every few allocations.
-static inline void malloc_maybe_collect(jl_ptls_t ptls, size_t sz)
-{
-    // We do not need to carefully maintain malloc_sz_since_last_poll. We just need to
-    // avoid using mmtk_gc_poll too frequently, and try to be precise on our heap usage
-    // as much as we can.
-    if (ptls->malloc_sz_since_last_poll > 4096) {
-        jl_atomic_store_relaxed(&ptls->malloc_sz_since_last_poll, 0);
-        mmtk_gc_poll(ptls);
-    } else {
-        jl_atomic_fetch_add_relaxed(&ptls->malloc_sz_since_last_poll, sz);
-        jl_gc_safepoint_(ptls);
-    }
-}
-
-// allocation
-int jl_gc_classify_pools(size_t sz, int *osize)
-{
-    if (sz > GC_MAX_SZCLASS)
-        return -1; // call big alloc function
-    size_t allocsz = sz + sizeof(jl_taggedvalue_t);
-    *osize = LLT_ALIGN(allocsz, 16);
-    return 0; // use MMTk's fastpath logic
-}
-
-// malloc wrappers, aligned allocation
-// We currently just duplicate what Julia GC does. We will in the future replace the malloc calls with MMTK's malloc.
-
-#if defined(_OS_WINDOWS_)
-inline void *jl_malloc_aligned(size_t sz, size_t align)
-{
-    return _aligned_malloc(sz ? sz : 1, align);
-}
-inline void *jl_realloc_aligned(void *p, size_t sz, size_t oldsz,
-                                       size_t align)
-{
-    (void)oldsz;
-    return _aligned_realloc(p, sz ? sz : 1, align);
-}
-inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    _aligned_free(p);
-}
-#else
-inline void *jl_malloc_aligned(size_t sz, size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return malloc(sz);
-#endif
-    void *ptr;
-    if (posix_memalign(&ptr, align, sz))
-        return NULL;
-    return ptr;
-}
-inline void *jl_realloc_aligned(void *d, size_t sz, size_t oldsz,
-                                       size_t align)
-{
-#if defined(_P64) || defined(__APPLE__)
-    if (align <= 16)
-        return realloc(d, sz);
-#endif
-    void *b = jl_malloc_aligned(sz, align);
-    if (b != NULL) {
-        memcpy(b, d, oldsz > sz ? sz : oldsz);
-        free(d);
-    }
-    return b;
-}
-inline void jl_free_aligned(void *p) JL_NOTSAFEPOINT
-{
-    free(p);
-}
-#endif
-
-// weak references
-// ---
-JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *value)
-{
-    jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type);
-    wr->value = value;  // NOTE: wb not needed here
-    mmtk_add_weak_candidate(wr);
-    return wr;
-}
-
-
-// big values
-// ---
-
-// Size includes the tag and the tag is not cleared!!
-inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
-{
-    // TODO: assertion needed here?
-    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
-    // TODO: drop this okay?
-    // maybe_collect(ptls);
-
-    jl_value_t *v = jl_mmtk_gc_alloc_big(ptls, sz);
-    // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_big; enable
-    // here when that's edited?
-    /*
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
-    jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
-    */
-    // TODO: move to jl_mmtk_gc_alloc_big if needed.
-/*
-#ifdef MEMDEBUG
-    memset(v, 0xee, allocsz);
-#endif
-*/
-    // TODO: need to set this? have to move to jl_mmtk_gc_alloc_big then.
-    // v->age = 0;
-    // TODO: dropping this; confirm okay? `sweep_big` no longer needed?
-    // gc_big_object_link(v, &ptls->heap.big_objects);
-    return v;
-}
-
-// Size includes the tag and the tag is not cleared!!
-inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset, int osize)
-{
-    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
-#ifdef MEMDEBUG
-    return jl_gc_big_alloc(ptls, osize);
-#endif
-    // TODO: drop this okay?
-    // maybe_collect(ptls);
-
-    jl_value_t *v = jl_mmtk_gc_alloc_default(ptls, osize, 16, NULL);
-    // TODO: this is done (without atomic operations) in jl_mmtk_gc_alloc_default; enable
-    // here when that's edited?
-    /*
-    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
-        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize);
-    jl_atomic_store_relaxed(&ptls->gc_num.poolalloc,
-        jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1);
-    */
-   return v;
-}
-
-// roots
-// ---
-
-JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
-{
-    mmtk_unreachable();
-}
-
-// TODO: exported, but not MMTk-specific?
-JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const void *stored, jl_datatype_t *dt) JL_NOTSAFEPOINT
-{
-    mmtk_unreachable();
-}
-
-
-// marking
-// ---
-
-JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
-{
-    mmtk_unreachable();
-    return 0;
-}
-JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
-                                            jl_value_t **objs, size_t nobjs)
-{
-    mmtk_unreachable();
-}
-
-
-// GC control
-// ---
-
-JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
-{
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    if (jl_atomic_load_acquire(&jl_gc_disable_counter)) {
-        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_tls.gc_num.allocd) + gc_num.interval;
-        jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval);
-        static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
-        jl_atomic_fetch_add_relaxed((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
-        return;
-    }
-    mmtk_handle_user_collection_request(ptls, collection);
-}
-
-// Per-thread initialization
-// TODO: remove `norm_pools`, `weak_refs`, etc. from `heap`?
-// TODO: remove `gc_cache`?
-void jl_init_thread_heap(jl_ptls_t ptls)
-{
-    jl_thread_heap_t *heap = &ptls->gc_tls.heap;
-    jl_gc_pool_t *p = heap->norm_pools;
-    for (int i = 0; i < JL_GC_N_POOLS; i++) {
-        p[i].osize = jl_gc_sizeclasses[i];
-        p[i].freelist = NULL;
-        p[i].newpages = NULL;
-    }
-    small_arraylist_new(&heap->weak_refs, 0);
-    small_arraylist_new(&heap->live_tasks, 0);
-    for (int i = 0; i < JL_N_STACK_POOLS; i++)
-        small_arraylist_new(&heap->free_stacks[i], 0);
-    heap->mallocarrays = NULL;
-    heap->mafreelist = NULL;
-    heap->big_objects = NULL;
-    arraylist_new(&heap->remset, 0);
-    arraylist_new(&ptls->finalizers, 0);
-    arraylist_new(&ptls->gc_tls.sweep_objs, 0);
-
-    jl_gc_mark_cache_t *gc_cache = &ptls->gc_tls.gc_cache;
-    gc_cache->perm_scanned_bytes = 0;
-    gc_cache->scanned_bytes = 0;
-    gc_cache->nbig_obj = 0;
-
-    memset(&ptls->gc_tls.gc_num, 0, sizeof(ptls->gc_tls.gc_num));
-    jl_atomic_store_relaxed(&ptls->gc_tls.gc_num.allocd, -(int64_t)gc_num.interval);
-
-    // Clear the malloc sz count
-    jl_atomic_store_relaxed(&ptls->malloc_sz_since_last_poll, 0);
-
-    // Create mutator
-    MMTk_Mutator mmtk_mutator = mmtk_bind_mutator((void *)ptls, ptls->tid);
-    // Copy the mutator to the thread local storage
-    memcpy(&ptls->mmtk_mutator, mmtk_mutator, sizeof(MMTkMutatorContext));
-    // Call post_bind to maintain a list of active mutators and to reclaim the old mutator (which is no longer needed)
-    mmtk_post_bind_mutator(&ptls->mmtk_mutator, mmtk_mutator);
-}
-
-void jl_free_thread_gc_state(jl_ptls_t ptls)
-{
-}
-
-void jl_deinit_thread_heap(jl_ptls_t ptls)
-{
-    mmtk_destroy_mutator(&ptls->mmtk_mutator);
-}
-
-extern jl_mutex_t finalizers_lock;
-extern arraylist_t to_finalize;
-extern arraylist_t finalizer_list_marked;
-
-// System-wide initialization
-// TODO: remove locks? remove anything else?
-void jl_gc_init(void)
-{
-    if (jl_options.heap_size_hint)
-        jl_gc_set_max_memory(jl_options.heap_size_hint);
-
-    JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
-    JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
-    uv_mutex_init(&gc_perm_lock);
-
-    arraylist_new(&to_finalize, 0);
-    arraylist_new(&finalizer_list_marked, 0);
-
-    gc_num.interval = default_collect_interval;
-    last_long_collect_interval = default_collect_interval;
-    gc_num.allocd = 0;
-    gc_num.max_pause = 0;
-    gc_num.max_memory = 0;
-
-#ifdef _P64
-    total_mem = uv_get_total_memory();
-    uint64_t constrained_mem = uv_get_constrained_memory();
-    if (constrained_mem > 0 && constrained_mem < total_mem)
-        total_mem = constrained_mem;
-#endif
-
-    // We allocate with abandon until we get close to the free memory on the machine.
-    uint64_t free_mem = uv_get_available_memory();
-    uint64_t high_water_mark = free_mem / 10 * 7;  // 70% high water mark
-
-    if (high_water_mark < max_total_memory)
-       max_total_memory = high_water_mark;
-
-    // MMTk-specific
-    long long min_heap_size;
-    long long max_heap_size;
-    char* min_size_def = getenv("MMTK_MIN_HSIZE");
-    char* min_size_gb = getenv("MMTK_MIN_HSIZE_G");
-
-    char* max_size_def = getenv("MMTK_MAX_HSIZE");
-    char* max_size_gb = getenv("MMTK_MAX_HSIZE_G");
-
-    // default min heap currently set as Julia's default_collect_interval
-    if (min_size_def != NULL) {
-        char *p;
-        double min_size = strtod(min_size_def, &p);
-        min_heap_size = (long) 1024 * 1024 * min_size;
-    } else if (min_size_gb != NULL) {
-        char *p;
-        double min_size = strtod(min_size_gb, &p);
-        min_heap_size = (long) 1024 * 1024 * 1024 * min_size;
-    } else {
-        min_heap_size = default_collect_interval;
-    }
-
-    // default max heap currently set as 70% the free memory in the system
-    if (max_size_def != NULL) {
-        char *p;
-        double max_size = strtod(max_size_def, &p);
-        max_heap_size = (long) 1024 * 1024 * max_size;
-    } else if (max_size_gb != NULL) {
-        char *p;
-        double max_size = strtod(max_size_gb, &p);
-        max_heap_size = (long) 1024 * 1024 * 1024 * max_size;
-    } else {
-        max_heap_size = uv_get_free_memory() * 70 / 100;
-    }
-
-    // Assert that the number of stock GC threads is 0; MMTK uses the number of threads in jl_options.ngcthreads
-    assert(jl_n_gcthreads == 0);
-
-    // Check that the julia_copy_stack rust feature has been defined when the COPY_STACK has been defined
-    int copy_stacks;
-
-#ifdef COPY_STACKS
-    copy_stacks = 1;
-#else
-    copy_stacks = 0;
-#endif
-
-    mmtk_julia_copy_stack_check(copy_stacks);
-
-    // if only max size is specified initialize MMTk with a fixed size heap
-    // TODO: We just assume mark threads means GC threads, and ignore the number of concurrent sweep threads.
-    // If the two values are the same, we can use either. Otherwise, we need to be careful.
-    uintptr_t gcthreads = jl_options.nmarkthreads;
-    if (max_size_def != NULL || (max_size_gb != NULL && (min_size_def == NULL && min_size_gb == NULL))) {
-        mmtk_gc_init(0, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag);
-    } else {
-        mmtk_gc_init(min_heap_size, max_heap_size, gcthreads, &mmtk_upcalls, (sizeof(jl_taggedvalue_t)), jl_buff_tag);
-    }
-}
-
-// allocation wrappers that track allocation and let collection run
-
-JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
-{
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    void *data = malloc(sz);
-    if (data != NULL && pgcstack != NULL && ct->world_age) {
-        jl_ptls_t ptls = ct->ptls;
-        malloc_maybe_collect(ptls, sz);
-        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz);
-    }
-    return data;
-}
-
-JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
-{
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    void *data = calloc(nm, sz);
-    if (data != NULL && pgcstack != NULL && ct->world_age) {
-        jl_ptls_t ptls = ct->ptls;
-        malloc_maybe_collect(ptls, nm * sz);
-        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, nm * sz);
-    }
-    return data;
-}
-
-JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
-{
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    free(p);
-    if (pgcstack != NULL && ct->world_age) {
-        jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, -sz);
-    }
-}
-
-JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
-{
-    jl_gcframe_t **pgcstack = jl_get_pgcstack();
-    jl_task_t *ct = jl_current_task;
-    if (pgcstack && ct->world_age) {
-        jl_ptls_t ptls = ct->ptls;
-        malloc_maybe_collect(ptls, sz);
-        if (sz < old)
-            jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, old - sz);
-        else
-            jl_atomic_fetch_add_relaxed(&JULIA_MALLOC_BYTES, sz - old);
-    }
-    return realloc(p, sz);
-}
-
-jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
-{
-    size_t len = jl_string_len(s);
-    jl_value_t *snew = jl_alloc_string(sz);
-    memcpy(jl_string_data(snew), jl_string_data(s), sz <= len ? sz : len);
-    if(mmtk_is_pinned(s)) {
-        // if the source string was pinned, we also pin the new one
-        mmtk_pin_object(snew);
-    }
-    return snew;
-}
-
-JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
-{
-    return 0;
-}
-
-JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void)
-{
-    return 0;
-}
-
-// TODO: if this is needed, it can be added in MMTk
-JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
-{
-    return NULL;
-}
-
-
-// gc-debug functions
-// ---
-
-jl_gc_pagemeta_t *jl_gc_page_metadata(void *data)
-{
-    return NULL;
-}
-
-JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p)
-{
-    return NULL;
-}
-
-void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT
-{
-}
-
-void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT
-{
-    // May not be accurate but should be helpful enough
-    uint64_t pool_count = gc_num.poolalloc;
-    uint64_t big_count = gc_num.bigalloc;
-    jl_safe_printf("Allocations: %" PRIu64 " "
-                   "(Pool: %" PRIu64 "; Big: %" PRIu64 "); GC: %d\n",
-                   pool_count + big_count, pool_count, big_count, gc_num.pause);
-}
-
-void jl_print_gc_stats(JL_STREAM *s)
-{
-}
-
-// gc thread function
-void jl_gc_threadfun(void *arg)
-{
-    mmtk_unreachable();
-}
-
-// added for MMTk integration
-
-JL_DLLEXPORT void jl_gc_array_ptr_copy(jl_array_t *dest, void **dest_p, jl_array_t *src, void **src_p, ssize_t n) JL_NOTSAFEPOINT
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    mmtk_memory_region_copy(&ptls->mmtk_mutator, jl_array_owner(src), src_p, jl_array_owner(dest), dest_p, n);
-}
-
-// No inline write barrier -- only used for debugging
-JL_DLLEXPORT void jl_gc_wb1_noinline(const void *parent) JL_NOTSAFEPOINT
-{
-    jl_gc_wb_back(parent);
-}
-
-JL_DLLEXPORT void jl_gc_wb2_noinline(const void *parent, const void *ptr) JL_NOTSAFEPOINT
-{
-    jl_gc_wb(parent, ptr);
-}
-
-JL_DLLEXPORT void jl_gc_wb1_slow(const void *parent) JL_NOTSAFEPOINT
-{
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, (const void*) 0);
-}
-
-JL_DLLEXPORT void jl_gc_wb2_slow(const void *parent, const void* ptr) JL_NOTSAFEPOINT
-{
-    jl_task_t *ct = jl_current_task;
-    jl_ptls_t ptls = ct->ptls;
-    mmtk_object_reference_write_slow(&ptls->mmtk_mutator, parent, ptr);
-}
-
-void *jl_gc_perm_alloc_nolock(size_t sz, int zero, unsigned align, unsigned offset)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    size_t allocsz = mmtk_align_alloc_sz(sz);
-    void* addr = mmtk_immortal_alloc_fast(&ptls->mmtk_mutator, allocsz, align, offset);
-    return addr;
-}
-
-void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
-{
-    return jl_gc_perm_alloc_nolock(sz, zero, align, offset);
-}
-
-void jl_gc_notify_image_load(const char* img_data, size_t len)
-{
-    mmtk_set_vm_space((void*)img_data, len);
-}
-
-void jl_gc_notify_image_alloc(char* img_data, size_t len)
-{
-    mmtk_immortal_region_post_alloc((void*)img_data, len);
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // MMTK_GC
diff --git a/src/module.c b/src/module.c
index bfe266ee424f5..f4da7e1e994de 100644
--- a/src/module.c
+++ b/src/module.c
@@ -12,6 +12,23 @@
 extern "C" {
 #endif
 
+// In this translation unit and this translation unit only emit this symbol `extern` for use by julia
+EXTERN_INLINE_DEFINE jl_binding_partition_t *jl_get_binding_partition(jl_binding_t *b, size_t world) JL_NOTSAFEPOINT;
+EXTERN_INLINE_DEFINE uint8_t jl_bpart_get_kind(jl_binding_partition_t *bpart) JL_NOTSAFEPOINT;
+extern inline enum jl_partition_kind decode_restriction_kind(jl_ptr_kind_union_t pku) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_binding_partition_t *jl_get_globalref_partition(jl_globalref_t *gr, size_t world)
+{
+    if (!gr)
+        return NULL;
+    jl_binding_t *b = NULL;
+    if (gr)
+        b = gr->binding;
+    if (!b)
+        b = jl_get_module_binding(gr->mod, gr->name, 0);
+    return jl_get_binding_partition(b, world);
+}
+
 JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, uint8_t default_names)
 {
     jl_task_t *ct = jl_current_task;
@@ -35,6 +52,8 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui
     m->compile = -1;
     m->infer = -1;
     m->max_methods = -1;
+    m->file = name; // Using the name as a placeholder is better than nothing
+    m->line = 0;
     m->hash = parent == NULL ? bitmix(name->hash, jl_module_type->hash) :
         bitmix(name->hash, parent->hash);
     JL_MUTEX_INIT(&m->lock, "module->lock");
@@ -161,37 +180,51 @@ static jl_globalref_t *jl_new_globalref(jl_module_t *mod, jl_sym_t *name, jl_bin
     jl_task_t *ct = jl_current_task;
     jl_globalref_t *g = (jl_globalref_t*)jl_gc_alloc(ct->ptls, sizeof(jl_globalref_t), jl_globalref_type);
     g->mod = mod;
-    jl_gc_wb(g, g->mod);
+    jl_gc_wb_fresh(g, g->mod);
     g->name = name;
+    jl_gc_wb_fresh(g, g->name);
     g->binding = b;
+    jl_gc_wb_fresh(g, g->binding);
     return g;
 }
 
+static jl_binding_partition_t *new_binding_partition(void)
+{
+    jl_binding_partition_t *bpart = (jl_binding_partition_t*)jl_gc_alloc(jl_current_task->ptls, sizeof(jl_binding_partition_t), jl_binding_partition_type);
+    jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD));
+    bpart->min_world = 0;
+    jl_atomic_store_relaxed(&bpart->max_world, (size_t)-1);
+    jl_atomic_store_relaxed(&bpart->next, NULL);
+#ifdef _P64
+    bpart->reserved = 0;
+#endif
+    return bpart;
+}
+
 static jl_binding_t *new_binding(jl_module_t *mod, jl_sym_t *name)
 {
     jl_task_t *ct = jl_current_task;
     assert(jl_is_module(mod) && jl_is_symbol(name));
     jl_binding_t *b = (jl_binding_t*)jl_gc_alloc(ct->ptls, sizeof(jl_binding_t), jl_binding_type);
     jl_atomic_store_relaxed(&b->value, NULL);
-    jl_atomic_store_relaxed(&b->owner, NULL);
-    jl_atomic_store_relaxed(&b->ty, NULL);
+    jl_atomic_store_relaxed(&b->partitions, NULL);
     b->globalref = NULL;
-    b->constp = 0;
     b->exportp = 0;
     b->publicp = 0;
-    b->imported = 0;
     b->deprecated = 0;
-    b->usingfailed = 0;
-    b->padding = 0;
     JL_GC_PUSH1(&b);
     b->globalref = jl_new_globalref(mod, name, b);
+    jl_gc_wb(b, b->globalref);
+    jl_binding_partition_t *bpart = new_binding_partition();
+    jl_atomic_store_relaxed(&b->partitions, bpart);
+    jl_gc_wb(b, bpart);
     JL_GC_POP();
     return b;
 }
 
 extern jl_mutex_t jl_modules_mutex;
 
-static void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
+extern void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
 {
     if (jl_current_task->ptls->in_pure_callback)
         jl_errorf("new globals cannot be created in a generated function");
@@ -222,14 +255,21 @@ static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym
 JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 != b) {
-        if (b2 == NULL) {
-            check_safe_newbinding(m, var);
-            if (!alloc)
-                jl_errorf("Global %s.%s does not exist and cannot be assigned. Declare it using `global` before attempting assignment.", jl_symbol_name(m->name), jl_symbol_name(var));
-        }
-        if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) {
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+retry:
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_DECLARED) {
+                check_safe_newbinding(m, var);
+                if (!alloc)
+                    jl_errorf("Global %s.%s does not exist and cannot be assigned. Declare it using `global` before attempting assignment.", jl_symbol_name(m->name), jl_symbol_name(var));
+            }
+            jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)jl_any_type, BINDING_KIND_GLOBAL);
+            if (!jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+                goto retry;
+            jl_gc_wb_knownold(bpart, jl_any_type);
+        } else {
             jl_module_t *from = jl_binding_dbgmodule(b, m, var);
             if (from == m)
                 jl_errorf("cannot assign a value to imported variable %s.%s",
@@ -251,43 +291,88 @@ JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var
     return b->globalref->mod; // TODO: deprecate this?
 }
 
+JL_DLLEXPORT jl_value_t *jl_get_binding_value(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return decode_restriction_value(pku);
+    return jl_atomic_load_relaxed(&b->value);
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_seqcst(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return decode_restriction_value(pku);
+    return jl_atomic_load(&b->value);
+}
+
+JL_DLLEXPORT jl_value_t *jl_get_binding_value_if_const(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
+        return NULL;
+    if (!jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return NULL;
+    return decode_restriction_value(pku);
+}
+
+typedef struct _modstack_t {
+    jl_module_t *m;
+    jl_sym_t *var;
+    struct _modstack_t *prev;
+} modstack_t;
+static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
+
+JL_DLLEXPORT jl_value_t *jl_reresolve_binding_value_seqcst(jl_binding_t *b)
+{
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)))) {
+        jl_resolve_owner(b, b->globalref->mod, b->globalref->name, NULL);
+    }
+    return jl_get_binding_value_seqcst(b);
+}
+
 // get binding for adding a method
 // like jl_get_binding_wr, but has different error paths and messages
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 != b) {
-        if (b2 == NULL)
-            check_safe_newbinding(m, var);
-        if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) {
-            jl_value_t *f = jl_atomic_load_relaxed(&b2->value);
-            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
-            if (f == NULL) {
-                // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from
-                jl_errorf("invalid method definition in %s: exported function %s.%s does not exist",
-                          jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_DECLARED) {
+                check_safe_newbinding(m, var);
             }
-            // TODO: we might want to require explicitly importing types to add constructors
-            //       or we might want to drop this error entirely
-            if (!b->imported && !(b2->constp && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) {
-                jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
-                          jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
-            }
-            return b2;
+            return b;
+        }
+        jl_value_t *f = jl_get_binding_value_if_const(b);
+        if (f == NULL) {
+            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
+            // we must have implicitly imported this with using, so call jl_binding_dbgmodule to try to get the name of the module we got this from
+            jl_errorf("invalid method definition in %s: exported function %s.%s does not exist",
+                        jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
+        }
+        // TODO: we might want to require explicitly importing types to add constructors
+        //       or we might want to drop this error entirely
+        if (decode_restriction_kind(pku) != BINDING_KIND_IMPORTED && !(f && jl_is_type(f) && strcmp(jl_symbol_name(var), "=>") != 0)) {
+            jl_module_t *from = jl_binding_dbgmodule(b, m, var);
+            jl_errorf("invalid method definition in %s: function %s.%s must be explicitly imported to be extended",
+                        jl_symbol_name(m->name), jl_symbol_name(from->name), jl_symbol_name(var));
         }
+        return b;
     }
     return b;
 }
 
-typedef struct _modstack_t {
-    jl_module_t *m;
-    jl_sym_t *var;
-    struct _modstack_t *prev;
-} modstack_t;
-
-static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
-
 static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 
 #ifndef __clang_gcanalyzer__
@@ -298,23 +383,28 @@ static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROO
 }
 #endif
 
-static int eq_bindings(jl_binding_t *owner, jl_binding_t *alias)
+static int eq_bindings(jl_binding_partition_t *owner, jl_binding_t *alias, size_t world)
 {
-    assert(owner == jl_atomic_load_relaxed(&owner->owner));
-    if (owner == alias)
+    jl_ptr_kind_union_t owner_pku = jl_atomic_load_relaxed(&owner->restriction);
+    assert(decode_restriction_kind(owner_pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(owner_pku) == BINDING_KIND_DECLARED ||
+           jl_bkind_is_some_constant(decode_restriction_kind(owner_pku)));
+    jl_binding_partition_t *alias_bpart = jl_get_binding_partition(alias, world);
+    if (owner == alias_bpart)
         return 1;
-    alias = jl_atomic_load_relaxed(&alias->owner);
-    if (owner == alias)
+    jl_ptr_kind_union_t alias_pku = jl_walk_binding_inplace(&alias, &alias_bpart, world);
+    if (jl_bkind_is_some_constant(decode_restriction_kind(owner_pku)) &&
+        jl_bkind_is_some_constant(decode_restriction_kind(alias_pku)) &&
+        decode_restriction_value(owner_pku) &&
+        decode_restriction_value(alias_pku) == decode_restriction_value(owner_pku))
         return 1;
-    if (owner->constp && alias->constp && jl_atomic_load_relaxed(&owner->value) && jl_atomic_load_relaxed(&alias->value) == jl_atomic_load_relaxed(&owner->value))
-        return 1;
-    return 0;
+    return owner == alias_bpart;
 }
 
 // find a binding from a module's `usings` list
 static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, jl_module_t **from, modstack_t *st, int warn)
 {
     jl_binding_t *b = NULL;
+    jl_binding_partition_t *bpart = NULL;
     jl_module_t *owner = NULL;
     JL_LOCK(&m->lock);
     int i = (int)m->usings.len - 1;
@@ -329,13 +419,17 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
             if (tempb == NULL)
                 // couldn't resolve; try next using (see issue #6105)
                 continue;
-            assert(jl_atomic_load_relaxed(&tempb->owner) == tempb);
-            if (b != NULL && !tempb->deprecated && !b->deprecated && !eq_bindings(tempb, b)) {
+            jl_binding_partition_t *tempbpart = jl_get_binding_partition(tempb, jl_current_task->world_age);
+            jl_ptr_kind_union_t tempb_pku = jl_atomic_load_relaxed(&tempbpart->restriction);
+            assert(decode_restriction_kind(tempb_pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(tempb_pku) == BINDING_KIND_DECLARED || jl_bkind_is_some_constant(decode_restriction_kind(tempb_pku)));
+            (void)tempb_pku;
+            if (bpart != NULL && !tempb->deprecated && !b->deprecated && !eq_bindings(tempbpart, b, jl_current_task->world_age)) {
                 if (warn) {
                     // set usingfailed=1 to avoid repeating this warning
                     // the owner will still be NULL, so it can be later imported or defined
                     tempb = jl_get_module_binding(m, var, 1);
-                    tempb->usingfailed = 1;
+                    tempbpart = jl_get_binding_partition(tempb, jl_current_task->world_age);
+                    jl_atomic_store_release(&tempbpart->restriction, encode_restriction(NULL, BINDING_KIND_FAILED));
                     jl_printf(JL_STDERR,
                               "WARNING: both %s and %s export \"%s\"; uses of it in module %s must be qualified\n",
                               jl_symbol_name(owner->name),
@@ -347,6 +441,7 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
             if (owner == NULL || !tempb->deprecated) {
                 owner = imp;
                 b = tempb;
+                bpart = tempbpart;
             }
         }
     }
@@ -358,13 +453,14 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
 // this might not be the same as the owner of the binding, since the binding itself may itself have been imported from elsewhere
 static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var)
 {
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 != b && !b->imported) {
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) != BINDING_KIND_GLOBAL) {
         // for implicitly imported globals, try to re-resolve it to find the module we got it from most directly
         jl_module_t *from = NULL;
-        b = using_resolve_binding(m, var, &from, NULL, 0);
-        if (b) {
-            if (b2 == NULL || jl_atomic_load_relaxed(&b->owner) == jl_atomic_load_relaxed(&b2->owner))
+        jl_binding_t *b2 = using_resolve_binding(m, var, &from, NULL, 0);
+        if (b2) {
+            jl_binding_partition_t *b2part = jl_get_binding_partition(b2, jl_current_task->world_age);
+            if (eq_bindings(b2part, b, jl_current_task->world_age))
                 return from;
             // if we did not find it (or accidentally found a different one), ignore this
         }
@@ -379,10 +475,16 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
 {
     if (b == NULL)
         b = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
-    if (b2 == NULL) {
-        if (b->usingfailed)
-            return NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+retry:
+    if (decode_restriction_kind(pku) == BINDING_KIND_FAILED)
+        return NULL;
+    if (decode_restriction_kind(pku) == BINDING_KIND_DECLARED) {
+        return b;
+    }
+    if (decode_restriction_kind(pku) == BINDING_KIND_GUARD) {
+        jl_binding_t *b2 = NULL;
         modstack_t top = { m, var, st };
         modstack_t *tmp = st;
         for (; tmp != NULL; tmp = tmp->prev) {
@@ -397,19 +499,17 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
             return NULL;
         assert(from);
         JL_GC_PROMISE_ROOTED(from); // gc-analysis does not understand output parameters
+        JL_GC_PROMISE_ROOTED(b2);
         if (b2->deprecated) {
-            if (jl_atomic_load_relaxed(&b2->value) == jl_nothing) {
+            if (jl_get_binding_value(b2) == jl_nothing) {
                 // silently skip importing deprecated values assigned to nothing (to allow later mutation)
                 return NULL;
             }
         }
         // do a full import to prevent the result of this lookup from
         // changing, for example if this var is assigned to later.
-        jl_binding_t *owner = NULL;
-        if (!jl_atomic_cmpswap(&b->owner, &owner, b2)) {
-            // concurrent import
-            return owner;
-        }
+        if (!jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction((jl_value_t*)b2, BINDING_KIND_IMPLICIT)))
+            goto retry;
         if (b2->deprecated) {
             b->deprecated = 1; // we will warn about this below, but we might want to warn at the use sites too
             if (m != jl_main_module && m != jl_base_module &&
@@ -424,20 +524,26 @@ static jl_binding_t *jl_resolve_owner(jl_binding_t *b/*optional*/, jl_module_t *
                 jl_binding_dep_message(from, var, b2);
             }
         }
+        return b2;
     }
-    assert(jl_atomic_load_relaxed(&b2->owner) == b2);
-    return b2;
+    jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    return b;
 }
 
 // get the current likely owner of binding when accessing m.var, without resolving the binding (it may change later)
 JL_DLLEXPORT jl_binding_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
 {
-    jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    jl_binding_t *b = jl_get_module_binding(m, var, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
     jl_module_t *from = m;
-    if (b == NULL || (!b->usingfailed && jl_atomic_load_relaxed(&b->owner) == NULL))
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) == BINDING_KIND_GUARD) {
         b = using_resolve_binding(m, var, &from, NULL, 0);
-    else
-        b = jl_atomic_load_relaxed(&b->owner);
+        bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    }
+    pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL && !jl_bkind_is_some_constant(decode_restriction_kind(pku)))
+        return NULL;
     return b;
 }
 
@@ -445,13 +551,20 @@ JL_DLLEXPORT jl_binding_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
     if (b == NULL)
         return jl_nothing;
-    b = jl_atomic_load_relaxed(&b->owner);
-    if (b == NULL)
+    jl_ptr_kind_union_t pku = jl_walk_binding_inplace(&b, &bpart, jl_current_task->world_age);
+    if (jl_bkind_is_some_guard(decode_restriction_kind(pku)))
         return jl_nothing;
-    jl_value_t *ty = jl_atomic_load_relaxed(&b->ty);
-    return ty ? ty : jl_nothing;
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        // TODO: We would like to return the type of the constant, but
+        // currently code relies on this returning any to bypass conversion
+        // before an attempted assignment to a constant.
+        // return jl_typeof(jl_atomic_load_relaxed(&bpart->restriction));
+        return (jl_value_t*)jl_any_type;
+    }
+    return decode_restriction_value(pku);
 }
 
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m, jl_sym_t *var)
@@ -482,7 +595,8 @@ JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && b->imported;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_IMPORTED;
 }
 
 extern const char *jl_filename;
@@ -501,7 +615,7 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
     jl_binding_t *dep_message_binding = jl_get_binding(m, jl_symbol(dep_binding_name));
     jl_value_t *dep_message = NULL;
     if (dep_message_binding != NULL)
-        dep_message = jl_atomic_load_relaxed(&dep_message_binding->value);
+        dep_message = jl_get_binding_value(dep_message_binding);
     JL_GC_PUSH1(&dep_message);
     if (dep_message != NULL) {
         if (jl_is_string(dep_message)) {
@@ -512,7 +626,7 @@ static void jl_binding_dep_message(jl_module_t *m, jl_sym_t *name, jl_binding_t
         }
     }
     else {
-        jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+        jl_value_t *v = jl_get_binding_value(b);
         dep_message = v; // use as gc-root
         if (v) {
             if (jl_is_type(v) || jl_is_module(v)) {
@@ -549,9 +663,12 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *asname,
                   jl_symbol_name(to->name));
     }
     else {
-        assert(jl_atomic_load_relaxed(&b->owner) == b);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        assert(decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(pku) == BINDING_KIND_DECLARED || jl_bkind_is_some_constant(decode_restriction_kind(pku)));
+        (void)pku;
         if (b->deprecated) {
-            if (jl_atomic_load_relaxed(&b->value) == jl_nothing) {
+            if (jl_get_binding_value(b) == jl_nothing) {
                 // silently skip importing deprecated values assigned to nothing (to allow later mutation)
                 return;
             }
@@ -575,17 +692,28 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *asname,
             // importing a binding on top of itself. harmless.
             return;
         }
-        jl_binding_t *ownerto = NULL;
-        if (jl_atomic_cmpswap(&bto->owner, &ownerto, b)) {
-            bto->imported |= (explici != 0);
+        jl_binding_partition_t *btopart = jl_get_binding_partition(bto, jl_current_task->world_age);
+        jl_ptr_kind_union_t bto_pku = jl_atomic_load_relaxed(&btopart->restriction);
+retry:
+        if (decode_restriction_kind(bto_pku) == BINDING_KIND_GUARD ||
+            decode_restriction_kind(bto_pku) == BINDING_KIND_IMPLICIT ||
+            decode_restriction_kind(bto_pku) == BINDING_KIND_FAILED) {
+
+            jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)b, (explici != 0) ? BINDING_KIND_IMPORTED : BINDING_KIND_EXPLICIT);
+            if (!jl_atomic_cmpswap(&btopart->restriction, &bto_pku, new_pku))
+                goto retry;
             bto->deprecated |= b->deprecated; // we already warned about this above, but we might want to warn at the use sites too
         }
         else {
-            if (eq_bindings(b, bto)) {
-                // already imported
-                bto->imported |= (explici != 0);
+            if (eq_bindings(bpart, bto, jl_current_task->world_age)) {
+                // already imported - potentially upgrade to _IMPORTED or _EXPLICIT
+                if (jl_bkind_is_some_import(decode_restriction_kind(bto_pku))) {
+                    jl_ptr_kind_union_t new_pku = encode_restriction(decode_restriction_value(bto_pku), (explici != 0) ? BINDING_KIND_IMPORTED : BINDING_KIND_EXPLICIT);
+                    if (!jl_atomic_cmpswap(&btopart->restriction, &bto_pku, new_pku))
+                        goto retry;
+                }
             }
-            else if (ownerto != bto) {
+            else if (jl_bkind_is_some_import(decode_restriction_kind(bto_pku))) {
                 // already imported from somewhere else
                 jl_printf(JL_STDERR,
                           "WARNING: ignoring conflicting import of %s.%s into %s\n",
@@ -647,18 +775,24 @@ JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from)
         jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
-        if (b->exportp && (jl_atomic_load_relaxed(&b->owner) == b || b->imported)) {
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+        if (b->exportp && (decode_restriction_kind(pku) == BINDING_KIND_GLOBAL || decode_restriction_kind(pku) == BINDING_KIND_IMPORTED)) {
             jl_sym_t *var = b->globalref->name;
             jl_binding_t *tob = jl_get_module_binding(to, var, 0);
-            if (tob && jl_atomic_load_relaxed(&tob->owner) != NULL &&
-                // don't warn for conflicts with the module name itself.
-                // see issue #4715
-                var != to->name &&
-                !eq_bindings(jl_atomic_load_relaxed(&tob->owner), b)) {
-                jl_printf(JL_STDERR,
-                          "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
-                          jl_symbol_name(from->name), jl_symbol_name(var),
-                          jl_symbol_name(to->name));
+            if (tob) {
+                jl_binding_partition_t *tobpart = jl_get_binding_partition(tob, jl_current_task->world_age);
+                jl_ptr_kind_union_t tobpku = jl_walk_binding_inplace(&tob, &tobpart, jl_current_task->world_age);
+                if (tob && decode_restriction_kind(tobpku) != BINDING_KIND_GUARD &&
+                    // don't warn for conflicts with the module name itself.
+                    // see issue #4715
+                    var != to->name &&
+                    !eq_bindings(tobpart, b, jl_current_task->world_age)) {
+                    jl_printf(JL_STDERR,
+                            "WARNING: using %s.%s in module %s conflicts with an existing identifier.\n",
+                            jl_symbol_name(from->name), jl_symbol_name(var),
+                            jl_symbol_name(to->name));
+                }
             }
         }
         table = jl_atomic_load_relaxed(&from->bindings);
@@ -683,14 +817,23 @@ JL_DLLEXPORT void jl_module_public(jl_module_t *from, jl_sym_t *s, int exported)
 
 JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var, int allow_import) // unlike most queries here, this is currently seq_cst
 {
-    jl_binding_t *b = allow_import ? jl_get_binding(m, var) : jl_get_module_binding(m, var, 0);
-    return b && (jl_atomic_load_relaxed(&b->owner) == b) && (jl_atomic_load(&b->value) != NULL);
+    jl_binding_t *b = jl_get_module_binding(m, var, allow_import);
+    if (!b)
+        return 0;
+    if (!allow_import) {
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        if (!bpart || jl_bkind_is_some_import(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction))))
+            return 0;
+        return jl_get_binding_value(b) != NULL;
+    }
+    return jl_reresolve_binding_value_seqcst(b) != NULL;
 }
 
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && (b->exportp || jl_atomic_load_relaxed(&b->owner) == b);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && (b->exportp || decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_GLOBAL);
 }
 
 JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var)
@@ -708,10 +851,14 @@ JL_DLLEXPORT int jl_module_public_p(jl_module_t *m, jl_sym_t *var)
 JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 0);
-    return b && jl_atomic_load_relaxed(&b->owner) != NULL;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!bpart)
+        return 0;
+    enum jl_partition_kind kind = decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
+    return kind == BINDING_KIND_DECLARED || !jl_bkind_is_some_guard(kind);
 }
 
-static uint_t bindingkey_hash(size_t idx, jl_value_t *data)
+uint_t bindingkey_hash(size_t idx, jl_value_t *data)
 {
     jl_binding_t *b = (jl_binding_t*)jl_svecref(data, idx); // This must always happen inside the lock
     jl_sym_t *var = b->globalref->name;
@@ -736,6 +883,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m, jl_sym_t *var,
         ssize_t idx = jl_smallintset_lookup(bindingkeyset, bindingkey_eq, var, (jl_value_t*)bindings, hv, 0); // acquire
         if (idx != -1) {
             jl_binding_t *b = (jl_binding_t*)jl_svecref(bindings, idx); // relaxed
+            JL_GC_PROMISE_ROOTED(b);
             if (locked)
                 JL_UNLOCK(&m->lock);
             return b;
@@ -780,7 +928,7 @@ JL_DLLEXPORT jl_value_t *jl_get_globalref_value(jl_globalref_t *gr)
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
     // ignores b->deprecated
-    return b == NULL ? NULL : jl_atomic_load_relaxed(&b->value);
+    return b == NULL ? NULL : jl_get_binding_value(b);
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
@@ -791,7 +939,7 @@ JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
     // XXX: this only considers if the original is deprecated, not the binding in m
     if (b->deprecated)
         jl_binding_deprecation_warning(m, var, b);
-    return jl_atomic_load_relaxed(&b->value);
+    return jl_get_binding_value(b);
 }
 
 JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
@@ -804,43 +952,33 @@ JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var
 {
     // this function is mostly only used during initialization, so the data races here are not too important to us
     jl_binding_t *bp = jl_get_module_binding(m, var, 1);
-    jl_binding_t *b2 = NULL;
-    if (!jl_atomic_cmpswap(&bp->owner, &b2, bp) && b2 != bp)
-        jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var));
-    if (jl_atomic_load_relaxed(&bp->value) == NULL) {
-        jl_value_t *old_ty = NULL;
-        jl_atomic_cmpswap_relaxed(&bp->ty, &old_ty, (jl_value_t*)jl_any_type);
-        uint8_t constp = 0;
-        // if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
-        if (constp = bp->constp, bp->constp = 1, constp == 0) {
-            jl_value_t *old = NULL;
-            if (jl_atomic_cmpswap(&bp->value, &old, val)) {
-                jl_gc_wb(bp, val);
-                return;
-            }
-        }
-    }
-    jl_errorf("invalid redefinition of constant %s", jl_symbol_name(var));
+    jl_binding_partition_t *bpart = jl_get_binding_partition(bp, jl_current_task->world_age);
+    jl_atomic_store_release(&bpart->restriction, encode_restriction(val, BINDING_KIND_CONST));
+    jl_gc_wb(bpart, val);
 }
 
 JL_DLLEXPORT int jl_globalref_is_const(jl_globalref_t *gr)
 {
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    return b && b->constp;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    if (!bpart)
+        return 0;
+    return jl_bkind_is_some_constant(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)));
 }
 
 JL_DLLEXPORT int jl_globalref_boundp(jl_globalref_t *gr)
 {
     jl_binding_t *b = gr->binding;
     b = jl_resolve_owner(b, gr->mod, gr->name, NULL);
-    return b && jl_atomic_load_relaxed(&b->value) != NULL;
+    return b && jl_get_binding_value(b) != NULL;
 }
 
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var)
 {
     jl_binding_t *b = jl_get_binding(m, var);
-    return b && b->constp;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    return b && jl_bkind_is_some_constant(decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)));
 }
 
 // set the deprecated flag for a binding:
@@ -870,7 +1008,6 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b
     if (b->deprecated == 1 && jl_options.depwarn) {
         if (jl_options.depwarn != JL_OPTIONS_DEPWARN_ERROR)
             jl_printf(JL_STDERR, "WARNING: ");
-        assert(jl_atomic_load_relaxed(&b->owner) == b);
         jl_printf(JL_STDERR, "%s.%s is deprecated",
                   jl_symbol_name(m->name), jl_symbol_name(s));
         jl_binding_dep_message(m, s, b);
@@ -889,39 +1026,29 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_sym_t *s, jl_binding_t *b
     }
 }
 
-jl_value_t *jl_check_binding_wr(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, int reassign)
+jl_value_t *jl_check_binding_wr(jl_binding_t *b JL_PROPAGATES_ROOT, jl_module_t *mod, jl_sym_t *var, jl_value_t *rhs JL_MAYBE_UNROOTED, int reassign)
 {
-    jl_value_t *old_ty = NULL;
-    if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type)) {
-        if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
-            JL_GC_PUSH1(&rhs); // callee-rooted
-            if (!jl_isa(rhs, old_ty))
-                jl_errorf("cannot assign an incompatible value to the global %s.%s.",
-                          jl_symbol_name(mod->name), jl_symbol_name(var));
-            JL_GC_POP();
-        }
-    }
-    else {
-        old_ty = (jl_value_t*)jl_any_type;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    assert(!jl_bkind_is_some_guard(decode_restriction_kind(pku)) && !jl_bkind_is_some_import(decode_restriction_kind(pku)));
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+        jl_value_t *old = decode_restriction_value(pku);
+        if (jl_egal(rhs, old))
+            return NULL;
+        if (jl_typeof(rhs) == jl_typeof(old))
+            jl_errorf("invalid redefinition of constant %s.%s. This redefinition may be permitted using the `const` keyword.",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+        else
+            jl_errorf("invalid redefinition of constant %s.%s.",
+                jl_symbol_name(mod->name), jl_symbol_name(var));
     }
-    if (b->constp) {
-        if (reassign) {
-            jl_value_t *old = NULL;
-            if (jl_atomic_cmpswap(&b->value, &old, rhs)) {
-                jl_gc_wb(b, rhs);
-                return NULL;
-            }
-            if (jl_egal(rhs, old))
-                return NULL;
-            if (jl_typeof(rhs) != jl_typeof(old) || jl_is_type(rhs) || jl_is_module(rhs))
-                reassign = 0;
-            else
-                jl_safe_printf("WARNING: redefinition of constant %s.%s. This may fail, cause incorrect answers, or produce other errors.\n",
-                               jl_symbol_name(mod->name), jl_symbol_name(var));
-        }
-        if (!reassign)
-            jl_errorf("invalid redefinition of constant %s.%s",
-                      jl_symbol_name(mod->name), jl_symbol_name(var));
+    jl_value_t *old_ty = decode_restriction_value(pku);
+    if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
+        JL_GC_PUSH1(&rhs); // callee-rooted
+        if (!jl_isa(rhs, old_ty))
+            jl_errorf("cannot assign an incompatible value to the global %s.%s.",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+        JL_GC_POP();
     }
     return old_ty;
 }
@@ -952,12 +1079,13 @@ JL_DLLEXPORT jl_value_t *jl_checked_replace(jl_binding_t *b, jl_module_t *mod, j
 
 JL_DLLEXPORT jl_value_t *jl_checked_modify(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *op, jl_value_t *rhs)
 {
-    jl_value_t *ty = NULL;
-    if (jl_atomic_cmpswap_relaxed(&b->ty, &ty, (jl_value_t*)jl_any_type))
-        ty = (jl_value_t*)jl_any_type;
-    if (b->constp)
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    assert(!jl_bkind_is_some_guard(decode_restriction_kind(pku)) && !jl_bkind_is_some_import(decode_restriction_kind(pku)));
+    if (jl_bkind_is_some_constant(decode_restriction_kind(pku)))
         jl_errorf("invalid redefinition of constant %s.%s",
                   jl_symbol_name(mod->name), jl_symbol_name(var));
+    jl_value_t *ty = decode_restriction_value(pku);
     return modify_value(ty, &b->value, (jl_value_t*)b, op, rhs, 1, mod, var);
 }
 
@@ -970,16 +1098,6 @@ JL_DLLEXPORT jl_value_t *jl_checked_assignonce(jl_binding_t *b, jl_module_t *mod
     return old;
 }
 
-JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var)
-{
-    // n.b. jl_get_binding_wr should have ensured b->owner == b as mod.var
-    if (jl_atomic_load_relaxed(&b->owner) != b || (jl_atomic_load_relaxed(&b->value) != NULL && !b->constp)) {
-        jl_errorf("cannot declare %s.%s constant; it already has a value",
-                  jl_symbol_name(mod->name), jl_symbol_name(var));
-    }
-    b->constp = 1;
-}
-
 JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
 {
     JL_LOCK(&m->lock);
@@ -996,11 +1114,6 @@ JL_DLLEXPORT jl_value_t *jl_module_usings(jl_module_t *m)
     return (jl_value_t*)a;
 }
 
-uint8_t _binding_is_from_explicit_using(jl_binding_t *b) {
-    jl_binding_t *owner = jl_atomic_load_relaxed(&b->owner);
-    return (owner != NULL && owner != b && !b->imported);
-}
-
 void _append_symbol_to_bindings_array(jl_array_t* a, jl_sym_t *name) {
     jl_array_grow_end(a, 1);
     //XXX: change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
@@ -1017,10 +1130,12 @@ void append_module_names(jl_array_t* a, jl_module_t *m, int all, int imported, i
         jl_sym_t *asname = b->globalref->name;
         int hidden = jl_symbol_name(asname)[0]=='#';
         int main_public = (m == jl_main_module && !(asname == jl_eval_sym || asname == jl_include_sym));
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        enum jl_partition_kind kind = decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction));
         if (((b->publicp) ||
-             (imported && b->imported) ||
-             (usings && _binding_is_from_explicit_using(b)) ||
-             (jl_atomic_load_relaxed(&b->owner) == b && !b->imported && (all || main_public))) &&
+             (imported && (kind == BINDING_KIND_CONST_IMPORT || kind == BINDING_KIND_IMPORTED)) ||
+             (usings && kind == BINDING_KIND_EXPLICIT) ||
+             ((kind == BINDING_KIND_GLOBAL || kind == BINDING_KIND_CONST || kind == BINDING_KIND_DECLARED) && (all || main_public))) &&
             (all || (!b->deprecated && !hidden)))
             _append_symbol_to_bindings_array(a, asname);
     }
@@ -1066,6 +1181,14 @@ jl_module_t *jl_module_root(jl_module_t *m)
     }
 }
 
+JL_DLLEXPORT jl_sym_t *jl_module_getloc(jl_module_t *m, int32_t *line)
+{
+    if (line) {
+        *line = m->line;
+    }
+    return m->file;
+}
+
 JL_DLLEXPORT jl_uuid_t jl_module_build_id(jl_module_t *m) { return m->build_id; }
 JL_DLLEXPORT jl_uuid_t jl_module_uuid(jl_module_t* m) { return m->uuid; }
 
@@ -1095,8 +1218,10 @@ JL_DLLEXPORT void jl_clear_implicit_imports(jl_module_t *m)
         jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
         if ((void*)b == jl_nothing)
             break;
-        if (jl_atomic_load_relaxed(&b->owner) && jl_atomic_load_relaxed(&b->owner) != b && !b->imported)
-            jl_atomic_store_relaxed(&b->owner, NULL);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+        if (decode_restriction_kind(jl_atomic_load_relaxed(&bpart->restriction)) == BINDING_KIND_IMPLICIT) {
+            jl_atomic_store_relaxed(&bpart->restriction, encode_restriction(NULL, BINDING_KIND_GUARD));
+        }
     }
     JL_UNLOCK(&m->lock);
 }
diff --git a/src/options.h b/src/options.h
index 58a7868093175..800be866183b0 100644
--- a/src/options.h
+++ b/src/options.h
@@ -68,8 +68,8 @@
 // GC_FINAL_STATS prints total GC stats at exit
 // #define GC_FINAL_STATS
 
-// MEMPROFILE prints pool summary statistics after every GC
-//#define MEMPROFILE
+// MEMPROFILE prints pool and large objects summary statistics after every GC
+// #define MEMPROFILE
 
 // GC_TIME prints time taken by each phase of GC
 // #define GC_TIME
@@ -110,7 +110,7 @@
 #if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
 #define JL_STACK_SIZE (64*1024*1024)
 #elif defined(_P64)
-#define JL_STACK_SIZE (4*1024*1024)
+#define JL_STACK_SIZE (8*1024*1024)
 #else
 #define JL_STACK_SIZE (2*1024*1024)
 #endif
diff --git a/src/pipeline.cpp b/src/pipeline.cpp
index 7704336045ad9..236be179e12c9 100644
--- a/src/pipeline.cpp
+++ b/src/pipeline.cpp
@@ -19,18 +19,6 @@
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Vectorize.h>
-#include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
-#include <llvm/Transforms/Instrumentation/ThreadSanitizer.h>
-#include <llvm/Transforms/Scalar/GVN.h>
-#include <llvm/Transforms/IPO/AlwaysInliner.h>
-#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
-#include <llvm/Transforms/InstCombine/InstCombine.h>
-#include <llvm/Transforms/Scalar/InstSimplifyPass.h>
-#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
 #include <llvm/Passes/PassBuilder.h>
 #include <llvm/Passes/PassPlugin.h>
 
@@ -40,6 +28,7 @@
 #include <llvm/Transforms/IPO/ConstantMerge.h>
 #include <llvm/Transforms/IPO/ForceFunctionAttrs.h>
 #include <llvm/Transforms/IPO/GlobalDCE.h>
+#include <llvm/Transforms/IPO/StripDeadPrototypes.h>
 #include <llvm/Transforms/InstCombine/InstCombine.h>
 #include <llvm/Transforms/Instrumentation/AddressSanitizer.h>
 #include <llvm/Transforms/Instrumentation/MemorySanitizer.h>
@@ -76,6 +65,8 @@
 #include <llvm/Transforms/Scalar/SimplifyCFG.h>
 #include <llvm/Transforms/Scalar/WarnMissedTransforms.h>
 #include <llvm/Transforms/Utils/InjectTLIMappings.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
 #include <llvm/Transforms/Vectorize/LoopVectorize.h>
 #include <llvm/Transforms/Vectorize/SLPVectorizer.h>
 #include <llvm/Transforms/Vectorize/VectorCombine.h>
@@ -609,7 +600,8 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
         if (O.getSpeedupLevel() >= 2) {
             buildVectorPipeline(FPM, PB, O, options);
         }
-        FPM.addPass(WarnMissedTransformationsPass());
+        if (options.warn_missed_transformations)
+            FPM.addPass(WarnMissedTransformationsPass());
         MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
     }
     buildIntrinsicLoweringPipeline(MPM, PB, O, options);
@@ -617,63 +609,6 @@ static void buildPipeline(ModulePassManager &MPM, PassBuilder *PB, OptimizationL
     MPM.addPass(AfterOptimizationMarkerPass());
 }
 
-struct PipelineConfig {
-    int Speedup;
-    int Size;
-    int lower_intrinsics;
-    int dump_native;
-    int external_use;
-    int llvm_only;
-    int always_inline;
-    int enable_early_simplifications;
-    int enable_early_optimizations;
-    int enable_scalar_optimizations;
-    int enable_loop_optimizations;
-    int enable_vector_pipeline;
-    int remove_ni;
-    int cleanup;
-};
-
-extern "C" JL_DLLEXPORT_CODEGEN void jl_build_newpm_pipeline_impl(void *MPM, void *PB, PipelineConfig* config) JL_NOTSAFEPOINT
-{
-    OptimizationLevel O;
-    switch (config->Size) {
-        case 1:
-            O = OptimizationLevel::Os;
-            break;
-        default:
-            O = OptimizationLevel::Oz;
-            break;
-        case 0:
-            switch (config->Speedup) {
-                case 0:
-                    O = OptimizationLevel::O0;
-                    break;
-                case 1:
-                    O = OptimizationLevel::O1;
-                    break;
-                case 2:
-                    O = OptimizationLevel::O2;
-                    break;
-                default:
-                    O = OptimizationLevel::O3;
-                    break;
-            }
-    }
-    buildPipeline(*reinterpret_cast<ModulePassManager*>(MPM), reinterpret_cast<PassBuilder*>(PB), O,
-                    OptimizationOptions{!!config->lower_intrinsics,
-                                        !!config->dump_native,
-                                        !!config->external_use,
-                                        !!config->llvm_only,
-                                        !!config->always_inline,
-                                        !!config->enable_early_simplifications,
-                                        !!config->enable_early_optimizations,
-                                        !!config->enable_scalar_optimizations,
-                                        !!config->enable_loop_optimizations,
-                                        !!config->enable_vector_pipeline,
-                                        !!config->remove_ni,
-                                        !!config->cleanup});
-}
 
 #undef JULIA_PASS
 
@@ -870,7 +805,16 @@ static Optional<std::pair<OptimizationLevel, OptimizationOptions>> parseJuliaPip
             OPTION(lower_intrinsics),
             OPTION(dump_native),
             OPTION(external_use),
-            OPTION(llvm_only)
+            OPTION(llvm_only),
+            OPTION(always_inline),
+            OPTION(enable_early_simplifications),
+            OPTION(enable_early_optimizations),
+            OPTION(enable_scalar_optimizations),
+            OPTION(enable_loop_optimizations),
+            OPTION(enable_vector_pipeline),
+            OPTION(remove_ni),
+            OPTION(cleanup),
+            OPTION(warn_missed_transformations)
 #undef OPTION
         };
         while (!name.empty()) {
diff --git a/src/precompile.c b/src/precompile.c
index c40e867ea699e..c21cf5367fba6 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -39,9 +39,17 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
         static jl_value_t *replace_depot_func = NULL;
         if (!replace_depot_func)
             replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
+        static jl_value_t *normalize_depots_func = NULL;
+        if (!normalize_depots_func)
+            normalize_depots_func = jl_get_global(jl_base_module, jl_symbol("normalize_depots_for_relocation"));
         ios_t srctext;
-        jl_value_t *deptuple = NULL;
-        JL_GC_PUSH2(&deptuple, &udeps);
+        jl_value_t *deptuple = NULL, *depots = NULL;
+        JL_GC_PUSH3(&deptuple, &udeps, &depots);
+        jl_task_t *ct = jl_current_task;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        depots = jl_apply(&normalize_depots_func, 1);
+        ct->world_age = last_age;
         for (size_t i = 0; i < len; i++) {
             deptuple = jl_array_ptr_ref(udeps, i);
             jl_value_t *depmod = jl_fieldref(deptuple, 0);  // module
@@ -60,13 +68,14 @@ void write_srctext(ios_t *f, jl_array_t *udeps, int64_t srctextpos) {
                 }
 
                 jl_value_t **replace_depot_args;
-                JL_GC_PUSHARGS(replace_depot_args, 2);
+                JL_GC_PUSHARGS(replace_depot_args, 3);
                 replace_depot_args[0] = replace_depot_func;
                 replace_depot_args[1] = abspath;
+                replace_depot_args[2] = depots;
                 jl_task_t *ct = jl_current_task;
                 size_t last_age = ct->world_age;
                 ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-                jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 2);
+                jl_value_t *depalias = (jl_value_t*)jl_apply(replace_depot_args, 3);
                 ct->world_age = last_age;
                 JL_GC_POP();
 
@@ -116,14 +125,16 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
         if (f) {
             jl_array_ptr_1d_push(jl_module_init_order, m);
             int setting = jl_get_module_compile((jl_module_t*)m);
-            if (setting != JL_OPTIONS_COMPILE_OFF &&
-                setting != JL_OPTIONS_COMPILE_MIN) {
+            if ((setting != JL_OPTIONS_COMPILE_OFF && (jl_options.trim ||
+                (setting != JL_OPTIONS_COMPILE_MIN)))) {
                 // TODO: this would be better handled if moved entirely to jl_precompile
                 // since it's a slightly duplication of effort
                 jl_value_t *tt = jl_is_type(f) ? (jl_value_t*)jl_wrap_Type(f) : jl_typeof(f);
                 JL_GC_PUSH1(&tt);
                 tt = jl_apply_tuple_type_v(&tt, 1);
                 jl_compile_hint((jl_tupletype_t*)tt);
+                if (jl_options.trim)
+                    jl_add_entrypoint((jl_tupletype_t*)tt);
                 JL_GC_POP();
             }
         }
@@ -188,6 +199,10 @@ JL_DLLEXPORT void jl_write_compiler_output(void)
             jl_printf(JL_STDERR, "\n  ** incremental compilation may be broken for this module **\n\n");
         }
     }
+    if (jl_options.trim) {
+        exit(0); // Some finalizers need to run and we've blown up the bindings table
+        // TODO: Is this still needed
+    }
     JL_GC_POP();
     jl_gc_enable_finalizers(ct, 1);
 }
diff --git a/src/precompile_utils.c b/src/precompile_utils.c
index 5a4f599d1f0eb..a78d1e66dbb51 100644
--- a/src/precompile_utils.c
+++ b/src/precompile_utils.c
@@ -321,3 +321,83 @@ static void *jl_precompile_worklist(jl_array_t *worklist, jl_array_t *extext_met
     JL_GC_POP();
     return native_code;
 }
+
+static int enq_ccallable_entrypoints_(jl_typemap_entry_t *def, void *closure)
+{
+    jl_method_t *m = def->func.method;
+    if (m->external_mt)
+        return 1;
+    if (m->ccallable)
+        jl_add_entrypoint((jl_tupletype_t*)jl_svecref(m->ccallable, 1));
+    return 1;
+}
+
+static int enq_ccallable_entrypoints(jl_methtable_t *mt, void *env)
+{
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), enq_ccallable_entrypoints_, env);
+}
+
+JL_DLLEXPORT void jl_add_ccallable_entrypoints(void)
+{
+    jl_foreach_reachable_mtable(enq_ccallable_entrypoints, NULL);
+}
+
+static void *jl_precompile_trimmed(size_t world)
+{
+    // array of MethodInstances and ccallable aliases to include in the output
+    jl_array_t *m = jl_alloc_vec_any(0);
+    jl_value_t *ccallable = NULL;
+    JL_GC_PUSH2(&m, &ccallable);
+    jl_method_instance_t *mi;
+    while (1)
+    {
+        mi = (jl_method_instance_t*)arraylist_pop(jl_entrypoint_mis);
+        if (mi == NULL)
+            break;
+        assert(jl_is_method_instance(mi));
+
+        jl_array_ptr_1d_push(m, (jl_value_t*)mi);
+        ccallable = (jl_value_t *)mi->def.method->ccallable;
+        if (ccallable)
+            jl_array_ptr_1d_push(m, ccallable);
+    }
+
+    jl_cgparams_t params = jl_default_cgparams;
+    params.trim = jl_options.trim;
+    void *native_code = jl_create_native(m, NULL, &params, 0, /* imaging */ 1, 0,
+                                         world);
+    JL_GC_POP();
+    return native_code;
+}
+
+static void jl_rebuild_methtables(arraylist_t* MIs, htable_t* mtables)
+{
+    size_t i;
+    for (i = 0; i < MIs->len; i++) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)MIs->items[i];
+        jl_method_t *m = mi->def.method;
+        jl_methtable_t *old_mt = jl_method_get_table(m);
+        if ((jl_value_t *)old_mt == jl_nothing)
+            continue;
+        jl_sym_t *name = old_mt->name;
+        if (!ptrhash_has(mtables, old_mt))
+            ptrhash_put(mtables, old_mt, jl_new_method_table(name, m->module));
+        jl_methtable_t *mt = (jl_methtable_t*)ptrhash_get(mtables, old_mt);
+        size_t world =  jl_atomic_load_acquire(&jl_world_counter);
+        jl_value_t * lookup = jl_methtable_lookup(mt, m->sig, world);
+        // Check if the method is already in the new table, if not then insert it there
+        if (lookup == jl_nothing || (jl_method_t*)lookup != m) {
+            //TODO: should this be a function like unsafe_insert_method?
+            size_t min_world = jl_atomic_load_relaxed(&m->primary_world);
+            size_t max_world = jl_atomic_load_relaxed(&m->deleted_world);
+            jl_atomic_store_relaxed(&m->primary_world, ~(size_t)0);
+            jl_atomic_store_relaxed(&m->deleted_world, 1);
+            jl_typemap_entry_t *newentry = jl_method_table_add(mt, m, NULL);
+            jl_atomic_store_relaxed(&m->primary_world, min_world);
+            jl_atomic_store_relaxed(&m->deleted_world, max_world);
+            jl_atomic_store_relaxed(&newentry->min_world, min_world);
+            jl_atomic_store_relaxed(&newentry->max_world, max_world);
+        }
+    }
+
+}
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index 0d9ed43a23a58..d28e527ed44e8 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -11,7 +11,7 @@
 
 // This nesting is required to allow compilation on musl
 #define USE_DYN_GETAUXVAL
-#if defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
+#if (defined(_OS_LINUX_) || defined(_OS_FREEBSD_)) && defined(_CPU_AARCH64_)
 #  undef USE_DYN_GETAUXVAL
 #  include <sys/auxv.h>
 #elif defined(__GLIBC_PREREQ)
@@ -737,7 +737,16 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
 #  define AT_HWCAP2 26
 #endif
 
-#if defined(USE_DYN_GETAUXVAL)
+#if defined(_OS_FREEBSD_)
+static inline unsigned long jl_getauxval(unsigned long type)
+{
+    unsigned long val;
+    if (elf_aux_info((int)type, &val, sizeof(val)) != 0) {
+        return 0;
+    }
+    return val;
+}
+#elif defined(USE_DYN_GETAUXVAL)
 static unsigned long getauxval_procfs(unsigned long type)
 {
     int fd = open("/proc/self/auxv", O_RDONLY);
@@ -830,7 +839,7 @@ template<typename T, typename F>
 static inline bool try_read_procfs_line(llvm::StringRef line, const char *prefix, T &out,
                                         bool &flag, F &&reset)
 {
-    if (!line.startswith(prefix))
+    if (!line.starts_with(prefix))
         return false;
     if (flag)
         reset();
diff --git a/src/rtutils.c b/src/rtutils.c
index 4a2e5c230883e..85a9be5e0b1da 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -269,10 +269,11 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_task_t *ct, jl_handler_t *eh)
     // `eh` may be not equal to `ct->eh`. See `jl_pop_handler`
     // This function should **NOT** have any safepoint before the ones at the
     // end.
-    sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
+    jl_ptls_t ptls = ct->ptls;
+    sig_atomic_t old_defer_signal = ptls->defer_signal;
     ct->eh = eh->prev;
     ct->gcstack = eh->gcstack;
-    small_arraylist_t *locks = &ct->ptls->locks;
+    small_arraylist_t *locks = &ptls->locks;
     int unlocks = locks->len > eh->locks_len;
     if (unlocks) {
         for (size_t i = locks->len; i > eh->locks_len; i--)
@@ -280,14 +281,26 @@ JL_DLLEXPORT void jl_eh_restore_state(jl_task_t *ct, jl_handler_t *eh)
         locks->len = eh->locks_len;
     }
     ct->world_age = eh->world_age;
-    ct->ptls->defer_signal = eh->defer_signal;
-    int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    ptls->defer_signal = eh->defer_signal;
+    int8_t old_gc_state = jl_atomic_load_relaxed(&ptls->gc_state);
     if (old_gc_state != eh->gc_state)
-        jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state);
+        jl_atomic_store_release(&ptls->gc_state, eh->gc_state);
     if (!old_gc_state || !eh->gc_state) // it was or is unsafe now
-        jl_gc_safepoint_(ct->ptls);
+        jl_gc_safepoint_(ptls);
+    jl_value_t *exception = ptls->sig_exception;
+    if (exception) {
+        int8_t oldstate = jl_gc_unsafe_enter(ptls);
+        /* The temporary ptls->bt_data is rooted by special purpose code in the
+        GC. This exists only for the purpose of preserving bt_data until we
+        set ptls->bt_size=0 below. */
+        jl_push_excstack(ct, &ct->excstack, exception,
+                         ptls->bt_data, ptls->bt_size);
+        ptls->bt_size = 0;
+        ptls->sig_exception = NULL;
+        jl_gc_unsafe_leave(ptls, oldstate);
+    }
     if (old_defer_signal && !eh->defer_signal)
-        jl_sigint_safepoint(ct->ptls);
+        jl_sigint_safepoint(ptls);
     if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers) &&
             unlocks && eh->locks_len == 0) {
         jl_gc_run_pending_finalizers(ct);
@@ -553,7 +566,7 @@ JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT
     if (jl_base_module == NULL)
         return NULL;
     jl_binding_t *stderr_obj = jl_get_module_binding(jl_base_module, jl_symbol("stderr"), 0);
-    return stderr_obj ? jl_atomic_load_relaxed(&stderr_obj->value) : NULL;
+    return stderr_obj ? jl_get_binding_value(stderr_obj) : NULL;
 }
 
 // toys for debugging ---------------------------------------------------------
@@ -648,12 +661,10 @@ static int is_globname_binding(jl_value_t *v, jl_datatype_t *dv) JL_NOTSAFEPOINT
     jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
     if (globname && dv->name->module) {
         jl_binding_t *b = jl_get_module_binding(dv->name->module, globname, 0);
-        if (b && jl_atomic_load_relaxed(&b->owner) && b->constp) {
-            jl_value_t *bv = jl_atomic_load_relaxed(&b->value);
-            // The `||` makes this function work for both function instances and function types.
-            if (bv == v || jl_typeof(bv) == v)
-                return 1;
-        }
+        jl_value_t *bv = jl_get_binding_value_if_const(b);
+        // The `||` makes this function work for both function instances and function types.
+        if (bv && (bv == v || jl_typeof(bv) == v))
+            return 1;
     }
     return 0;
 }
@@ -1403,6 +1414,7 @@ size_t jl_static_show_func_sig_(JL_STREAM *s, jl_value_t *type, jl_static_show_c
         return n;
     }
     if ((jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) &&
+            ((jl_datatype_t*)ftype)->name->mt &&
             ((jl_datatype_t*)ftype)->name->mt != jl_type_type_mt &&
             ((jl_datatype_t*)ftype)->name->mt != jl_nonfunction_mt) {
         n += jl_static_show_symbol(s, ((jl_datatype_t*)ftype)->name->mt->name);
diff --git a/src/scheduler.c b/src/scheduler.c
index 5c885dd2f3b76..7e23f654c2566 100644
--- a/src/scheduler.c
+++ b/src/scheduler.c
@@ -7,7 +7,6 @@
 
 #include "julia.h"
 #include "julia_internal.h"
-#include "gc.h"
 #include "threading.h"
 
 #ifdef __cplusplus
@@ -32,7 +31,7 @@ static const int16_t sleeping_like_the_dead JL_UNUSED = 2;
 // a running count of how many threads are currently not_sleeping
 // plus a running count of the number of in-flight wake-ups
 // n.b. this may temporarily exceed jl_n_threads
-static _Atomic(int) nrunning = 0;
+_Atomic(int) n_threads_running = 0;
 
 // invariant: No thread is ever asleep unless sleep_check_state is sleeping (or we have a wakeup signal pending).
 // invariant: Any particular thread is not asleep unless that thread's sleep_check_state is sleeping.
@@ -81,19 +80,6 @@ JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSA
     return 1;
 }
 
-// GC functions used
-extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
-                                         jl_gc_markqueue_t *mq, jl_value_t *obj) JL_NOTSAFEPOINT;
-
-// parallel task runtime
-// ---
-
-JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max)
-{
-    jl_ptls_t ptls = jl_current_task->ptls;
-    return cong(max, &ptls->rngseed);
-}
-
 // initialize the threading infrastructure
 // (called only by the main thread)
 void jl_init_threadinginfra(void)
@@ -112,93 +98,6 @@ void jl_init_threadinginfra(void)
 
 void JL_NORETURN jl_finish_task(jl_task_t *ct);
 
-#ifndef MMTK_GC
-
-static inline int may_mark(void) JL_NOTSAFEPOINT
-{
-    return (jl_atomic_load(&gc_n_threads_marking) > 0);
-}
-
-static inline int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
-{
-    return (jl_atomic_load(&ptls->gc_tls.gc_sweeps_requested) > 0);
-}
-
-// parallel gc thread function
-void jl_parallel_gc_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-    void *stack_lo, *stack_hi;
-    jl_init_stack_limits(0, &stack_lo, &stack_hi);
-    // warning: this changes `jl_current_task`, so be careful not to call that from this function
-    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
-    JL_GC_PROMISE_ROOTED(ct);
-    (void)jl_atomic_fetch_add_relaxed(&nrunning, -1);
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_PARALLEL_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    while (1) {
-        uv_mutex_lock(&gc_threads_lock);
-        while (!may_mark() && !may_sweep(ptls)) {
-            uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
-        }
-        uv_mutex_unlock(&gc_threads_lock);
-        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
-        gc_mark_loop_parallel(ptls, 0);
-        if (may_sweep(ptls)) {
-            assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_PARALLEL_COLLECTOR_THREAD);
-            gc_sweep_pool_parallel(ptls);
-            jl_atomic_fetch_add(&ptls->gc_tls.gc_sweeps_requested, -1);
-        }
-    }
-}
-
-// concurrent gc thread function
-void jl_concurrent_gc_threadfun(void *arg)
-{
-    jl_threadarg_t *targ = (jl_threadarg_t*)arg;
-
-    // initialize this thread (set tid and create heap)
-    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
-    void *stack_lo, *stack_hi;
-    jl_init_stack_limits(0, &stack_lo, &stack_hi);
-    // warning: this changes `jl_current_task`, so be careful not to call that from this function
-    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
-    JL_GC_PROMISE_ROOTED(ct);
-    (void)jl_atomic_fetch_add_relaxed(&nrunning, -1);
-    // wait for all threads
-    jl_gc_state_set(ptls, JL_GC_CONCURRENT_COLLECTOR_THREAD, JL_GC_STATE_UNSAFE);
-    uv_barrier_wait(targ->barrier);
-
-    // free the thread argument here
-    free(targ);
-
-    while (1) {
-        assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_CONCURRENT_COLLECTOR_THREAD);
-        uv_sem_wait(&gc_sweep_assists_needed);
-        gc_free_pages();
-    }
-}
-
-#else
-void jl_parallel_gc_threadfun(void *arg)
-{
-    mmtk_unreachable();
-}
-
-void jl_concurrent_gc_threadfun(void *arg)
-{
-    mmtk_unreachable();
-}
-#endif
-
 // thread function: used by all mutator threads except the main thread
 void jl_threadfun(void *arg)
 {
@@ -232,9 +131,9 @@ void jl_init_thread_scheduler(jl_ptls_t ptls) JL_NOTSAFEPOINT
     // record that there is now another thread that may be used to schedule work
     // we will decrement this again in scheduler_delete_thread, only slightly
     // in advance of pthread_join (which hopefully itself also had been
-    // adopted by now and is included in nrunning too)
-    (void)jl_atomic_fetch_add_relaxed(&nrunning, 1);
-    // n.b. this is the only point in the code where we ignore the invariants on the ordering of nrunning
+    // adopted by now and is included in n_threads_running too)
+    (void)jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+    // n.b. this is the only point in the code where we ignore the invariants on the ordering of n_threads_running
     // since we are being initialized from foreign code, we could not necessarily have expected or predicted that to happen
 }
 
@@ -287,6 +186,21 @@ static int sleep_check_after_threshold(uint64_t *start_cycles) JL_NOTSAFEPOINT
     return 0;
 }
 
+void surprise_wakeup(jl_ptls_t ptls) JL_NOTSAFEPOINT
+{
+    // equivalent to wake_thread, without the assert on wasrunning
+    int8_t state = jl_atomic_load_relaxed(&ptls->sleep_check_state);
+    if (state == sleeping) {
+        if (jl_atomic_cmpswap_relaxed(&ptls->sleep_check_state, &state, not_sleeping)) {
+            // this notification will never be consumed, so we may have now
+            // introduced some inaccuracy into the count, but that is
+            // unavoidable with any asynchronous interruption
+            jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
+        }
+    }
+}
+
+
 static int set_not_sleeping(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
@@ -294,7 +208,7 @@ static int set_not_sleeping(jl_ptls_t ptls) JL_NOTSAFEPOINT
             return 1;
         }
     }
-    int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, -1); // consume in-flight wakeup
+    int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, -1); // consume in-flight wakeup
     assert(wasrunning > 1); (void)wasrunning;
     return 0;
 }
@@ -306,7 +220,7 @@ static int wake_thread(int16_t tid) JL_NOTSAFEPOINT
     if (jl_atomic_load_relaxed(&ptls2->sleep_check_state) != not_sleeping) {
         int8_t state = sleeping;
         if (jl_atomic_cmpswap_relaxed(&ptls2->sleep_check_state, &state, not_sleeping)) {
-            int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1); // increment in-flight wakeup count
+            int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1); // increment in-flight wakeup count
             assert(wasrunning); (void)wasrunning;
             JL_PROBE_RT_SLEEP_CHECK_WAKE(ptls2, state);
             uv_mutex_lock(&ptls2->sleep_lock);
@@ -326,10 +240,7 @@ static void wake_libuv(void) JL_NOTSAFEPOINT
     JULIA_DEBUG_SLEEPWAKE( io_wakeup_leave = cycleclock() );
 }
 
-/* ensure thread tid is awake if necessary */
-JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
-{
-    jl_task_t *ct = jl_current_task;
+void wakeup_thread(jl_task_t *ct, int16_t tid) JL_NOTSAFEPOINT { // Pass in ptls when we have it already available to save a lookup
     int16_t self = jl_atomic_load_relaxed(&ct->tid);
     if (tid != self)
         jl_fence(); // [^store_buffering_1]
@@ -337,11 +248,11 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
     JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
     if (tid == self || tid == -1) {
         // we're already awake, but make sure we'll exit uv_run
-        // and that nrunning is updated if this is now considered in-flight
+        // and that n_threads_running is updated if this is now considered in-flight
         jl_ptls_t ptls = ct->ptls;
         if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
             if (jl_atomic_exchange_relaxed(&ptls->sleep_check_state, not_sleeping) != not_sleeping) {
-                int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1);
+                int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
                 assert(wasrunning); (void)wasrunning;
                 JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
             }
@@ -384,6 +295,12 @@ JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
     JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
 }
 
+/* ensure thread tid is awake if necessary */
+JL_DLLEXPORT void jl_wakeup_thread(int16_t tid) JL_NOTSAFEPOINT
+{
+    jl_task_t *ct = jl_current_task;
+    wakeup_thread(ct, tid);
+}
 
 // get the next runnable task
 static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
@@ -520,7 +437,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                     // responsibility, so need to make sure thread 0 will take care
                     // of us.
                     if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) == NULL) // aka trylock
-                        jl_wakeup_thread(0);
+                        wakeup_thread(ct, 0);
                 }
                 if (uvlock) {
                     int enter_eventloop = may_sleep(ptls);
@@ -562,8 +479,8 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                 }
 
                 // any thread which wants us running again will have to observe
-                // sleep_check_state==sleeping and increment nrunning for us
-                int wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, -1);
+                // sleep_check_state==sleeping and increment n_threads_running for us
+                int wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
                 assert(wasrunning);
                 isrunning = 0;
                 if (wasrunning == 1) {
@@ -587,8 +504,8 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                 while (may_sleep(ptls)) {
                     if (ptls->tid == 0) {
                         task = wait_empty;
-                        if (task && jl_atomic_load_relaxed(&nrunning) == 0) {
-                            wasrunning = jl_atomic_fetch_add_relaxed(&nrunning, 1);
+                        if (task && jl_atomic_load_relaxed(&n_threads_running) == 0) {
+                            wasrunning = jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
                             assert(!wasrunning);
                             wasrunning = !set_not_sleeping(ptls);
                             assert(!wasrunning);
@@ -599,11 +516,11 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
                         }
                         task = NULL;
                     }
-                    // else should we warn the user of certain deadlock here if tid == 0 && nrunning == 0?
+                    // else should we warn the user of certain deadlock here if tid == 0 && n_threads_running == 0?
                     uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
                 }
                 assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
-                assert(jl_atomic_load_relaxed(&nrunning));
+                assert(jl_atomic_load_relaxed(&n_threads_running));
                 start_cycles = 0;
                 uv_mutex_unlock(&ptls->sleep_lock);
                 JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
@@ -617,7 +534,7 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q,
             JL_CATCH {
                 // probably SIGINT, but possibly a user mistake in trypoptask
                 if (!isrunning)
-                    jl_atomic_fetch_add_relaxed(&nrunning, 1);
+                    jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
                 set_not_sleeping(ptls);
                 jl_rethrow();
             }
@@ -636,7 +553,7 @@ void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
     int notsleeping = jl_atomic_exchange_relaxed(&ptls->sleep_check_state, sleeping_like_the_dead) == not_sleeping;
     jl_fence();
     if (notsleeping) {
-        if (jl_atomic_load_relaxed(&nrunning) == 1) {
+        if (jl_atomic_load_relaxed(&n_threads_running) == 1) {
             jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[jl_atomic_load_relaxed(&io_loop_tid)];
             // This was the last running thread, and there is no thread with !may_sleep
             // so make sure tid 0 is notified to check wait_empty
@@ -646,10 +563,10 @@ void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT
         }
     }
     else {
-        jl_atomic_fetch_add_relaxed(&nrunning, 1);
+        jl_atomic_fetch_add_relaxed(&n_threads_running, 1);
     }
-    jl_wakeup_thread(0); // force thread 0 to see that we do not have the IO lock (and am dead)
-    jl_atomic_fetch_add_relaxed(&nrunning, -1);
+    wakeup_thread(jl_atomic_load_relaxed(&ptls->current_task), 0); // force thread 0 to see that we do not have the IO lock (and am dead)
+    jl_atomic_fetch_add_relaxed(&n_threads_running, -1);
 }
 
 #ifdef __cplusplus
diff --git a/src/signal-handling.c b/src/signal-handling.c
index 3b8d98fbee588..d7f4697a3c4f0 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -155,7 +155,7 @@ static void jl_shuffle_int_array_inplace(int *carray, int size, uint64_t *seed)
     // The "modern Fisher–Yates shuffle" - O(n) algorithm
     // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
     for (int i = size; i-- > 1; ) {
-        size_t j = cong(i, seed);
+        size_t j = cong(i + 1, seed); // cong is an open interval so we add 1
         uint64_t tmp = carray[j];
         carray[j] = carray[i];
         carray[i] = tmp;
@@ -340,6 +340,8 @@ static uintptr_t jl_get_pc_from_ctx(const void *_ctx)
     return ((CONTEXT*)_ctx)->Rip;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
     return ((ucontext_t*)_ctx)->uc_mcontext.pc;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    return ((ucontext_t*)_ctx)->uc_mcontext.mc_gpregs.gp_elr;
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     return ((ucontext_t*)_ctx)->uc_mcontext.arm_pc;
 #else
@@ -425,6 +427,8 @@ void jl_show_sigill(void *_ctx)
 #endif
 }
 
+void surprise_wakeup(jl_ptls_t ptls) JL_NOTSAFEPOINT;
+
 // make it invalid for a task to return from this point to its stack
 // this is generally quite an foolish operation, but does free you up to do
 // arbitrary things on this stack now without worrying about corrupt state that
@@ -437,15 +441,17 @@ void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT
         ct->eh = NULL;
         ct->world_age = 1;
         // Force all locks to drop. Is this a good idea? Of course not. But the alternative would probably deadlock instead of crashing.
-        small_arraylist_t *locks = &ct->ptls->locks;
+        jl_ptls_t ptls = ct->ptls;
+        small_arraylist_t *locks = &ptls->locks;
         for (size_t i = locks->len; i > 0; i--)
             jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
         locks->len = 0;
-        ct->ptls->in_pure_callback = 0;
-        ct->ptls->in_finalizer = 0;
-        ct->ptls->defer_signal = 0;
+        ptls->in_pure_callback = 0;
+        ptls->in_finalizer = 0;
+        ptls->defer_signal = 0;
         // forcibly exit GC (if we were in it) or safe into unsafe, without the mandatory safepoint
-        jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_UNSAFE);
+        jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_UNSAFE);
+        surprise_wakeup(ptls);
         // allow continuing to use a Task that should have already died--unsafe necromancy!
         jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
     }
@@ -459,6 +465,7 @@ void jl_critical_error(int sig, int si_code, bt_context_t *context, jl_task_t *c
     size_t i, n = ct ? *bt_size : 0;
     if (sig) {
         // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
+        // and also resets the state of ct and ptls so that some code can run on this task again
         jl_task_frame_noreturn(ct);
 #ifndef _OS_WINDOWS_
         sigset_t sset;
diff --git a/src/signals-mach.c b/src/signals-mach.c
index ad5788ea237e6..a939e4df71ae0 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -222,38 +222,92 @@ typedef arm_exception_state64_t host_exception_state_t;
 #define HOST_EXCEPTION_STATE_COUNT ARM_EXCEPTION_STATE64_COUNT
 #endif
 
-static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state,
-                             void (*fptr)(void))
+// create a fake function that describes the variable manipulations in jl_call_in_state
+__attribute__((naked)) static void fake_stack_pop(void)
 {
 #ifdef _CPU_X86_64_
-    uintptr_t rsp = state->__rsp;
+    __asm__ volatile (
+        "  .cfi_signal_frame\n"
+        "  .cfi_def_cfa %rsp, 0\n" // CFA here uses %rsp directly
+        "  .cfi_offset %rip, 0\n" // previous value of %rip at CFA
+        "  .cfi_offset %rsp, 8\n" // previous value of %rsp at CFA
+        "  nop\n"
+    );
 #elif defined(_CPU_AARCH64_)
-    uintptr_t rsp = state->__sp;
+    __asm__ volatile (
+        "  .cfi_signal_frame\n"
+        "  .cfi_def_cfa sp, 0\n" // use sp as fp here
+        "  .cfi_offset lr, 0\n"
+        "  .cfi_offset sp, 8\n"
+        // Anything else got smashed, since we didn't explicitly copy all of the
+        // state object to the stack (to build a real sigreturn frame).
+        // This is also not quite valid, since the AArch64 DWARF spec lacks the ability to define how to restore the LR register correctly,
+        // so normally libunwind implementations on linux detect this function specially and hack around the invalid info:
+        // https://github.com/llvm/llvm-project/commit/c82deed6764cbc63966374baf9721331901ca958
+        " nop\n"
+    );
 #else
-#error "julia: throw-in-context not supported on this platform"
+CFI_NORETURN
 #endif
-    if (ptls2 == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
-        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    }
-    else {
-        rsp = (uintptr_t)ptls2->signal_stack + (ptls2->signal_stack_size ? ptls2->signal_stack_size : sig_stack_size);
-    }
-    assert(rsp % 16 == 0);
-    rsp -= 16;
+}
 
+static void jl_call_in_state(host_thread_state_t *state, void (*fptr)(void))
+{
 #ifdef _CPU_X86_64_
-    rsp -= sizeof(void*);
-    state->__rsp = rsp; // set stack pointer
+    uintptr_t sp = state->__rsp;
+#elif defined(_CPU_AARCH64_)
+    uintptr_t sp = state->__sp;
+#endif
+    sp = (sp - 256) & ~(uintptr_t)15; // redzone and re-alignment
+    assert(sp % 16 == 0);
+    sp -= 16;
+#ifdef _CPU_X86_64_
+    // set return address to NULL
+    *(uintptr_t*)sp = 0;
+    // pushq %sp
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__rsp;
+    // pushq %rip
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__rip;
+    // pushq .fake_stack_pop + 1; aka call from fake_stack_pop
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = (uintptr_t)&fake_stack_pop + 1;
+    state->__rsp = sp; // set stack pointer
     state->__rip = (uint64_t)fptr; // "call" the function
 #elif defined(_CPU_AARCH64_)
-    state->__sp = rsp;
-    state->__pc = (uint64_t)fptr;
-    state->__lr = 0;
+    // push {%sp, %pc + 4}
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = state->__sp;
+    sp -= sizeof(void*);
+    *(uintptr_t*)sp = (uintptr_t)state->__pc;
+    state->__sp = sp; // x31
+    state->__pc = (uint64_t)fptr; // pc
+    state->__lr = (uintptr_t)&fake_stack_pop + 4; // x30
 #else
 #error "julia: throw-in-context not supported on this platform"
 #endif
 }
 
+static void jl_longjmp_in_state(host_thread_state_t *state, jl_jmp_buf jmpbuf)
+{
+
+    if (!jl_simulate_longjmp(jmpbuf, (bt_context_t*)state)) {
+        // for sanitizer builds, fallback to calling longjmp on the original stack
+        // (this will fail for stack overflow, but that is hardly sanitizer-legal anyways)
+#ifdef _CPU_X86_64_
+    state->__rdi = (uintptr_t)jmpbuf;
+    state->__rsi = 1;
+#elif defined(_CPU_AARCH64_)
+    state->__x[0] = (uintptr_t)jmpbuf;
+    state->__x[1] = 1;
+#else
+#error "julia: jl_longjmp_in_state not supported on this platform"
+#endif
+        jl_call_in_state(state, (void (*)(void))longjmp);
+    }
+}
+
 #ifdef _CPU_X86_64_
 int is_write_fault(host_exception_state_t exc_state) {
     return exc_reg_is_write_fault(exc_state.__err);
@@ -275,14 +329,26 @@ static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *
     host_thread_state_t state;
     kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
-    if (1) { // XXX: !jl_has_safe_restore(ptls2)
+    if (ptls2->safe_restore) {
+        jl_longjmp_in_state(&state, *ptls2->safe_restore);
+    }
+    else {
         assert(exception);
         ptls2->bt_size =
             rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t *)&state,
-                              NULL /*current_task?*/);
+                            NULL /*current_task?*/);
         ptls2->sig_exception = exception;
+        ptls2->io_wait = 0;
+        jl_task_t *ct = ptls2->current_task;
+        jl_handler_t *eh = ct->eh;
+        if (eh != NULL) {
+            asan_unpoison_task_stack(ct, &eh->eh_ctx);
+            jl_longjmp_in_state(&state, eh->eh_ctx);
+        }
+        else {
+            jl_no_exc_handler(exception, ct);
+        }
     }
-    jl_call_in_state(ptls2, &state, &jl_sig_throw);
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
 }
@@ -290,14 +356,18 @@ static void jl_throw_in_thread(jl_ptls_t ptls2, mach_port_t thread, jl_value_t *
 static void segv_handler(int sig, siginfo_t *info, void *context)
 {
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_get_safe_restore()) { // restarting jl_ or jl_unwind_stepn
-        jl_task_t *ct = jl_get_current_task();
-        jl_ptls_t ptls = ct == NULL ? NULL : ct->ptls;
-        jl_call_in_state(ptls, (host_thread_state_t*)jl_to_bt_context(context), &jl_sig_throw);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or jl_unwind_stepn
+        jl_longjmp_in_state((host_thread_state_t*)jl_to_bt_context(context), *saferestore);
+        return;
     }
-    else {
-        sigdie_handler(sig, info, context);
+    jl_task_t *ct = jl_get_current_task();
+    if ((sig != SIGBUS || info->si_code == BUS_ADRERR) &&
+    !(ct == NULL || ct->ptls == NULL || jl_atomic_load_relaxed(&ct->ptls->gc_state) == JL_GC_STATE_WAITING || ct->eh == NULL)
+    && is_addr_on_stack(ct, info->si_addr)) { // stack overflow and not a BUS_ADRALN (alignment error)
+        stack_overflow_warning();
     }
+    sigdie_handler(sig, info, context);
 }
 
 // n.b. mach_exc_server expects us to define this symbol locally
@@ -349,12 +419,10 @@ kern_return_t catch_mach_exception_raise(
         jl_safe_printf("ERROR: Exception handler triggered on unmanaged thread.\n");
         return KERN_INVALID_ARGUMENT;
     }
-    // XXX: jl_throw_in_thread or segv_handler will eventually check this, but
-    //      we would like to avoid some of this work if we could detect this earlier
-    // if (jl_has_safe_restore(ptls2)) {
-    //     jl_throw_in_thread(ptls2, thread, NULL);
-    //     return KERN_SUCCESS;
-    // }
+    if (ptls2->safe_restore) {
+        jl_throw_in_thread(ptls2, thread, NULL);
+        return KERN_SUCCESS;
+    }
     if (jl_atomic_load_acquire(&ptls2->gc_state) == JL_GC_STATE_WAITING)
         return KERN_FAILURE;
     if (exception == EXC_ARITHMETIC) {
@@ -513,7 +581,6 @@ static void jl_try_deliver_sigint(void)
 
 static void JL_NORETURN jl_exit_thread0_cb(int signo)
 {
-CFI_NORETURN
     jl_critical_error(signo, 0, NULL, jl_current_task);
     jl_atexit_hook(128);
     jl_raise(signo);
@@ -545,7 +612,7 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 #else
 #error Fill in first integer argument here
 #endif
-    jl_call_in_state(ptls2, &state, (void (*)(void))&jl_exit_thread0_cb);
+    jl_call_in_state(&state, (void (*)(void))&jl_exit_thread0_cb);
     unsigned int count = MACH_THREAD_STATE_COUNT;
     ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
@@ -735,16 +802,16 @@ void *mach_profile_listener(void *arg)
 #endif
                 jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
 
-                // store threadid but add 1 as 0 is preserved to indicate end of block
+                // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                // store task id (never null)
+                // META_OFFSET_TASKID store task id (never null)
                 bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
-                // store cpu cycle clock
+                // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
                 bt_data_prof[bt_size_cur++].uintptr = cycleclock();
 
-                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                // META_OFFSET_SLEEPSTATE store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
 
                 // Mark the end of this block with two 0's
diff --git a/src/signals-unix.c b/src/signals-unix.c
index eedc5f0aae94a..f99eca31730b6 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -44,7 +44,7 @@ static const size_t sig_stack_size = 8 * 1024 * 1024;
 
 // helper function for returning the unw_context_t inside a ucontext_t
 // (also used by stackwalk.c)
-bt_context_t *jl_to_bt_context(void *sigctx)
+bt_context_t *jl_to_bt_context(void *sigctx) JL_NOTSAFEPOINT
 {
 #ifdef __APPLE__
     return (bt_context_t*)&((ucontext64_t*)sigctx)->uc_mcontext64->__ss;
@@ -62,7 +62,11 @@ bt_context_t *jl_to_bt_context(void *sigctx)
 static int thread0_exit_count = 0;
 static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size);
 
-static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT;
+static void jl_longjmp_in_ctx(int sig, void *_ctx, jl_jmp_buf jmpbuf);
+
+#if !defined(_OS_DARWIN_)
+static inline uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
 {
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
@@ -76,15 +80,12 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
     return ctx->uc_mcontext.arm_sp;
-#elif defined(_OS_DARWIN_) && defined(_CPU_X86_64_)
-    const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
-    return ctx->uc_mcontext64->__ss.__rsp;
-#elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
-    const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
-    return ctx->uc_mcontext64->__ss.__sp;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     const ucontext_t *ctx = (const ucontext_t*)_ctx;
     return ctx->uc_mcontext.mc_rsp;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.mc_gpregs.gp_sp;
 #elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
     const struct sigcontext *ctx = (const struct sigcontext *)_ctx;
     return ctx->sc_rsp;
@@ -94,7 +95,7 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 #endif
 }
 
-static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
+static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) JL_NOTSAFEPOINT
 {
     // One guard page for signal_stack.
     return ptls->signal_stack == NULL ||
@@ -102,10 +103,8 @@ static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
             (char*)ptr <= (char*)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size));
 }
 
-// Modify signal context `_ctx` so that `fptr` will execute when the signal
-// returns. `fptr` will execute on the signal stack, and must not return.
-// jl_call_in_ctx is also currently executing on that signal stack,
-// so be careful not to smash it
+// Modify signal context `_ctx` so that `fptr` will execute when the signal returns
+// The function `fptr` itself must not return.
 JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx)
 {
     // Modifying the ucontext should work but there is concern that
@@ -115,44 +114,36 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     // checks that the syscall is made in the signal handler and that
     // the ucontext address is valid. Hopefully the value of the ucontext
     // will not be part of the validation...
-    if (!ptls) {
-        sigset_t sset;
-        sigemptyset(&sset);
-        sigaddset(&sset, sig);
-        pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
-        fptr();
-        return;
-    }
     uintptr_t rsp = jl_get_rsp_from_ctx(_ctx);
-    if (is_addr_on_sigstack(ptls, (void*)rsp))
-        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
-    else
-        rsp = (uintptr_t)ptls->signal_stack + (ptls->signal_stack_size ? ptls->signal_stack_size : sig_stack_size);
-    assert(rsp % 16 == 0);
-    rsp -= 16;
+    rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.gregs[REG_RSP] = rsp;
     ctx->uc_mcontext.gregs[REG_RIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.mc_rsp = rsp;
     ctx->uc_mcontext.mc_rip = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.gregs[REG_ESP] = rsp;
     ctx->uc_mcontext.gregs[REG_EIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->uc_mcontext.mc_esp = rsp;
     ctx->uc_mcontext.mc_eip = (uintptr_t)fptr;
 #elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
     struct sigcontext *ctx = (struct sigcontext *)_ctx;
     rsp -= sizeof(void*);
+    *(uintptr_t*)rsp = 0;
     ctx->sc_rsp = rsp;
     ctx->sc_rip = fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
@@ -160,6 +151,11 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     ctx->uc_mcontext.sp = rsp;
     ctx->uc_mcontext.regs[29] = 0; // Clear link register (x29)
     ctx->uc_mcontext.pc = (uintptr_t)fptr;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+    ucontext_t *ctx = (ucontext_t*)_ctx;
+    ctx->uc_mcontext.mc_gpregs.gp_sp = rsp;
+    ctx->uc_mcontext.mc_gpregs.gp_x[29] = 0; // Clear link register (x29)
+    ctx->uc_mcontext.mc_gpregs.gp_elr = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_ARM_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     uintptr_t target = (uintptr_t)fptr;
@@ -179,22 +175,6 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     ctx->uc_mcontext.arm_sp = rsp;
     ctx->uc_mcontext.arm_lr = 0; // Clear link register
     ctx->uc_mcontext.arm_pc = target;
-#elif defined(_OS_DARWIN_) && (defined(_CPU_X86_64_) || defined(_CPU_AARCH64_))
-    // Only used for SIGFPE.
-    // This doesn't seems to be reliable when the SIGFPE is generated
-    // from a divide-by-zero exception, which is now handled by
-    // `catch_exception_raise`. It works fine when a signal is received
-    // due to `kill`/`raise` though.
-    ucontext64_t *ctx = (ucontext64_t*)_ctx;
-#if defined(_CPU_X86_64_)
-    rsp -= sizeof(void*);
-    ctx->uc_mcontext64->__ss.__rsp = rsp;
-    ctx->uc_mcontext64->__ss.__rip = (uintptr_t)fptr;
-#else
-    ctx->uc_mcontext64->__ss.__sp = rsp;
-    ctx->uc_mcontext64->__ss.__pc = (uintptr_t)fptr;
-    ctx->uc_mcontext64->__ss.__lr = 0;
-#endif
 #else
 #pragma message("julia: throw-in-context not supported on this platform")
     // TODO Add support for PowerPC(64)?
@@ -205,30 +185,38 @@ JL_NO_ASAN static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int si
     fptr();
 #endif
 }
+#endif
 
 static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *e, int sig, void *sigctx)
 {
     jl_ptls_t ptls = ct->ptls;
-    if (!jl_get_safe_restore()) {
-        ptls->bt_size =
-            rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
-                              ct->gcstack);
-        ptls->sig_exception = e;
+    assert(!jl_get_safe_restore());
+    ptls->bt_size =
+        rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
+                            ct->gcstack);
+    ptls->sig_exception = e;
+    ptls->io_wait = 0;
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        jl_longjmp_in_ctx(sig, sigctx, eh->eh_ctx);
+    }
+    else {
+        jl_no_exc_handler(e, ct);
     }
-    jl_call_in_ctx(ptls, &jl_sig_throw, sig, sigctx);
 }
 
 static pthread_t signals_thread;
 
-static int is_addr_on_stack(jl_task_t *ct, void *addr)
+static int is_addr_on_stack(jl_task_t *ct, void *addr) JL_NOTSAFEPOINT
 {
-    if (ct->copy_stack) {
+    if (ct->ctx.copy_stack) {
         jl_ptls_t ptls = ct->ptls;
         return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize &&
                 (char*)addr < (char*)ptls->stackbase);
     }
-    return ((char*)addr > (char*)ct->stkbuf &&
-            (char*)addr < (char*)ct->stkbuf + ct->bufsz);
+    return ((char*)addr > (char*)ct->ctx.stkbuf &&
+            (char*)addr < (char*)ct->ctx.stkbuf + ct->ctx.bufsz);
 }
 
 static void sigdie_handler(int sig, siginfo_t *info, void *context)
@@ -306,6 +294,8 @@ int exc_reg_is_write_fault(uintptr_t esr) {
 #if defined(HAVE_MACH)
 #include "signals-mach.c"
 #else
+#include <poll.h>
+#include <sys/eventfd.h>
 
 int jl_lock_stackwalk(void)
 {
@@ -350,6 +340,13 @@ int is_write_fault(void *context) {
     ucontext_t *ctx = (ucontext_t*)context;
     return exc_reg_is_write_fault(ctx->uc_mcontext.mc_err);
 }
+#elif defined(_OS_FREEBSD_) && defined(_CPU_AARCH64_)
+// FreeBSD seems not to expose a means of accessing ESR via `ucontext_t` on AArch64.
+// TODO: Is there an alternative approach that can be taken? ESR may become accessible
+// in a future release though.
+int is_write_fault(void *context) {
+    return 0;
+}
 #elif defined(_OS_OPENBSD_) && defined(_CPU_X86_64_)
 int is_write_fault(void *context) {
     struct sigcontext *ctx = (struct sigcontext *)context;
@@ -362,7 +359,7 @@ int is_write_fault(void *context) {
 }
 #endif
 
-static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
+static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context) JL_NOTSAFEPOINT
 {
     return (ptls->signal_stack != NULL &&
             is_addr_on_sigstack(ptls, ptr) &&
@@ -372,8 +369,9 @@ static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
 JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
 {
     assert(sig == SIGSEGV || sig == SIGBUS);
-    if (jl_get_safe_restore()) { // restarting jl_ or profile
-        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        jl_longjmp_in_ctx(sig, context, *saferestore);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
@@ -424,17 +422,15 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context)
     }
 }
 
-#if !defined(JL_DISABLE_LIBUNWIND)
-static bt_context_t *signal_context;
-pthread_mutex_t in_signal_lock;
-static pthread_cond_t exit_signal_cond;
-static pthread_cond_t signal_caught_cond;
+pthread_mutex_t in_signal_lock; // shared with jl_delete_thread
+static bt_context_t *signal_context; // protected by in_signal_lock
+static int exit_signal_cond = -1;
+static int signal_caught_cond = -1;
+static int signals_inflight = 0;
 
 int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
 {
-    struct timespec ts;
-    clock_gettime(CLOCK_REALTIME, &ts);
-    ts.tv_sec += timeout;
+    int err;
     pthread_mutex_lock(&in_signal_lock);
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
     jl_task_t *ct2 = ptls2 ? jl_atomic_load_relaxed(&ptls2->current_task) : NULL;
@@ -443,48 +439,74 @@ int jl_thread_suspend_and_get_state(int tid, int timeout, bt_context_t *ctx)
         pthread_mutex_unlock(&in_signal_lock);
         return 0;
     }
-    jl_atomic_store_release(&ptls2->signal_request, 1);
-    pthread_kill(ptls2->system_id, SIGUSR2);
-    // wait for thread to acknowledge
-    int err = pthread_cond_timedwait(&signal_caught_cond, &in_signal_lock, &ts);
-    if (err == ETIMEDOUT) {
-        sig_atomic_t request = 1;
-        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
+    while (signals_inflight) {
+        // something is wrong, or there is already a usr2 in flight elsewhere
+        // try to wait for it to finish or wait for timeout
+        struct pollfd event = {signal_caught_cond, POLLIN, 0};
+        do {
+            err = poll(&event, 1, timeout * 1000);
+        } while (err == -1 && errno == EINTR);
+        if (err == -1 || (event.revents & POLLIN) == 0) {
+            // not ready after timeout: cancel this request
             pthread_mutex_unlock(&in_signal_lock);
             return 0;
         }
-        // Request is either now 0 (meaning the other thread is waiting for
-        //   exit_signal_cond already),
-        // Or it is now -1 (meaning the other thread
-        //   is waiting for in_signal_lock, and we need to release that lock
-        //   here for a bit, until the other thread has a chance to get to the
-        //   exit_signal_cond)
-        if (request == -1) {
-            err = pthread_cond_wait(&signal_caught_cond, &in_signal_lock);
-            assert(!err);
+        // consume it before continuing
+        eventfd_t got;
+        do {
+            err = read(signal_caught_cond, &got, sizeof(eventfd_t));
+        } while (err == -1 && errno == EINTR);
+        if (err != sizeof(eventfd_t)) abort();
+        assert(signals_inflight >= got);
+        signals_inflight -= got;
+    }
+    signals_inflight++;
+    sig_atomic_t request = jl_atomic_exchange(&ptls2->signal_request, 1);
+    assert(request == 0 || request == -1);
+    request = 1;
+    err = pthread_kill(ptls2->system_id, SIGUSR2);
+    if (err == 0) {
+        // wait for thread to acknowledge or timeout
+        struct pollfd event = {signal_caught_cond, POLLIN, 0};
+        do {
+            err = poll(&event, 1, timeout * 1000);
+        } while (err == -1 && errno == EINTR);
+        if (err != 1 || (event.revents & POLLIN) == 0)
+            err = -1;
+    }
+    if (err == -1) {
+        // not ready after timeout: try to cancel this request
+        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
+            signals_inflight--;
+            pthread_mutex_unlock(&in_signal_lock);
+            return 0;
         }
     }
+    eventfd_t got;
+    do {
+        err = read(signal_caught_cond, &got, sizeof(eventfd_t));
+    } while (err == -1 && errno == EINTR);
+    if (err != sizeof(eventfd_t)) abort();
+    assert(signals_inflight >= got);
+    signals_inflight -= got;
+    signals_inflight++;
     // Now the other thread is waiting on exit_signal_cond (verify that here by
     // checking it is 0, and add an acquire barrier for good measure)
-    int request = jl_atomic_load_acquire(&ptls2->signal_request);
-    assert(request == 0); (void) request;
-    jl_atomic_store_release(&ptls2->signal_request, 1); // prepare to resume normally
+    request = jl_atomic_load_acquire(&ptls2->signal_request);
+    assert(request == 0 || request == -1); (void) request;
+    jl_atomic_store_release(&ptls2->signal_request, 4); // prepare to resume normally, but later code may change this
     *ctx = *signal_context;
     return 1;
 }
 
 void jl_thread_resume(int tid)
 {
-    jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[tid];
-    pthread_cond_broadcast(&exit_signal_cond);
-    pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge (so that signal_request doesn't get mixed up)
-    // The other thread is waiting to leave exit_signal_cond (verify that here by
-    // checking it is 0, and add an acquire barrier for good measure)
-    int request = jl_atomic_load_acquire(&ptls2->signal_request);
-    assert(request == 0); (void) request;
+    int err;
+    eventfd_t got = 1;
+    err = write(exit_signal_cond, &got, sizeof(eventfd_t));
+    if (err != sizeof(eventfd_t)) abort();
     pthread_mutex_unlock(&in_signal_lock);
 }
-#endif
 
 // Throw jl_interrupt_exception if the master thread is in a signal async region
 // or if SIGINT happens too often.
@@ -493,9 +515,12 @@ static void jl_try_deliver_sigint(void)
     jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
+    pthread_mutex_lock(&in_signal_lock);
+    signals_inflight++;
     jl_atomic_store_release(&ptls2->signal_request, 2);
     // This also makes sure `sleep` is aborted.
     pthread_kill(ptls2->system_id, SIGUSR2);
+    pthread_mutex_unlock(&in_signal_lock);
 }
 
 // Write only by signal handling thread, read only by main thread
@@ -528,12 +553,13 @@ static void jl_exit_thread0(int signo, jl_bt_element_t *bt_data, size_t bt_size)
 }
 
 // request:
-// -1: beginning processing [invalid outside here]
+// -1: processing
 //  0: nothing [not from here]
-//  1: get state
+//  1: get state & wait for request
 //  2: throw sigint if `!defer_signal && io_wait` or if force throw threshold
 //     is reached
 //  3: raise `thread0_exit_signo` and try to exit
+//  4: no-op
 void usr2_handler(int sig, siginfo_t *info, void *ctx)
 {
     jl_task_t *ct = jl_get_current_task();
@@ -543,26 +569,36 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
     if (ptls == NULL)
         return;
     int errno_save = errno;
-    // acknowledge that we saw the signal_request
-    sig_atomic_t request = jl_atomic_exchange(&ptls->signal_request, -1);
-#if !defined(JL_DISABLE_LIBUNWIND)
+    sig_atomic_t request = jl_atomic_load(&ptls->signal_request);
+    if (request == 0)
+        return;
+    if (!jl_atomic_cmpswap(&ptls->signal_request, &request, -1))
+        return;
     if (request == 1) {
-        pthread_mutex_lock(&in_signal_lock);
         signal_context = jl_to_bt_context(ctx);
-        // acknowledge that we set the signal_caught_cond broadcast
-        request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == -1); (void) request;
-        pthread_cond_broadcast(&signal_caught_cond);
-        pthread_cond_wait(&exit_signal_cond, &in_signal_lock);
-        request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == 1 || request == 3);
-        // acknowledge that we got the resume signal
-        pthread_cond_broadcast(&signal_caught_cond);
-        pthread_mutex_unlock(&in_signal_lock);
+        // acknowledge that we saw the signal_request and set signal_context
+        int err;
+        eventfd_t got = 1;
+        err = write(signal_caught_cond, &got, sizeof(eventfd_t));
+        if (err != sizeof(eventfd_t)) abort();
+        sig_atomic_t processing = -1;
+        jl_atomic_cmpswap(&ptls->signal_request, &processing, 0);
+        // wait for exit signal
+        do {
+            err = read(exit_signal_cond, &got, sizeof(eventfd_t));
+        } while (err == -1 && errno == EINTR);
+        if (err != sizeof(eventfd_t)) abort();
+        assert(got == 1);
+        request = jl_atomic_exchange(&ptls->signal_request, -1);
+        signal_context = NULL;
+        assert(request == 2 || request == 3 || request == 4);
     }
-    else
-#endif
-    jl_atomic_exchange(&ptls->signal_request, 0); // returns -1
+    int err;
+    eventfd_t got = 1;
+    err = write(signal_caught_cond, &got, sizeof(eventfd_t));
+    if (err != sizeof(eventfd_t)) abort();
+    sig_atomic_t processing = -1;
+    jl_atomic_cmpswap(&ptls->signal_request, &processing, 0);
     if (request == 2) {
         int force = jl_check_force_sigint();
         if (force || (!ptls->defer_signal && ptls->io_wait)) {
@@ -571,7 +607,11 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
                 jl_safe_printf("WARNING: Force throwing a SIGINT\n");
             // Force a throw
             jl_clear_force_sigint();
-            jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
+            jl_jmp_buf *saferestore = jl_get_safe_restore();
+            if (saferestore) // restarting jl_ or profile
+                jl_longjmp_in_ctx(sig, ctx, *saferestore);
+            else
+                jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
         }
     }
     else if (request == 3) {
@@ -950,16 +990,16 @@ static void *signal_listener(void *arg)
 
                         jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[i];
 
-                        // store threadid but add 1 as 0 is preserved to indicate end of block
+                        // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
                         bt_data_prof[bt_size_cur++].uintptr = ptls2->tid + 1;
 
-                        // store task id (never null)
+                        // META_OFFSET_TASKID store task id (never null)
                         bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
 
-                        // store cpu cycle clock
+                        // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
                         bt_data_prof[bt_size_cur++].uintptr = cycleclock();
 
-                        // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                        // META_OFFSET_SLEEPSTATE store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
                         bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls2->sleep_check_state) + 1;
 
                         // Mark the end of this block with two 0's
@@ -996,12 +1036,12 @@ static void *signal_listener(void *arg)
         else if (critical) {
             // critical in this case actually means SIGINFO request
 #ifndef SIGINFO // SIGINFO already prints something similar automatically
-            int nrunning = 0;
+            int n_threads_running = 0;
             for (int idx = nthreads; idx-- > 0; ) {
                 jl_ptls_t ptls2 = jl_atomic_load_relaxed(&jl_all_tls_states)[idx];
-                nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
+                n_threads_running += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
             }
-            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, nthreads);
+            jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), n_threads_running, nthreads);
 #endif
 
             jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
@@ -1023,10 +1063,12 @@ void restore_signals(void)
     jl_sigsetset(&sset);
     pthread_sigmask(SIG_SETMASK, &sset, 0);
 
-#if !defined(HAVE_MACH) && !defined(JL_DISABLE_LIBUNWIND)
+#if !defined(HAVE_MACH)
+    exit_signal_cond = eventfd(0, EFD_CLOEXEC);
+    signal_caught_cond = eventfd(0, EFD_CLOEXEC);
     if (pthread_mutex_init(&in_signal_lock, NULL) != 0 ||
-        pthread_cond_init(&exit_signal_cond, NULL) != 0 ||
-        pthread_cond_init(&signal_caught_cond, NULL) != 0) {
+            exit_signal_cond == -1 ||
+            signal_caught_cond == -1) {
         jl_error("SIGUSR pthread init failed");
     }
 #endif
@@ -1039,8 +1081,9 @@ void restore_signals(void)
 static void fpe_handler(int sig, siginfo_t *info, void *context)
 {
     (void)info;
-    if (jl_get_safe_restore()) { // restarting jl_ or profile
-        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        jl_longjmp_in_ctx(sig, context, *saferestore);
         return;
     }
     jl_task_t *ct = jl_get_current_task();
@@ -1050,6 +1093,21 @@ static void fpe_handler(int sig, siginfo_t *info, void *context)
         jl_throw_in_ctx(ct, jl_diverror_exception, sig, context);
 }
 
+static void jl_longjmp_in_ctx(int sig, void *_ctx, jl_jmp_buf jmpbuf)
+{
+#if defined(_OS_DARWIN_)
+    jl_longjmp_in_state((host_thread_state_t*)jl_to_bt_context(_ctx), jmpbuf);
+#else
+    if (jl_simulate_longjmp(jmpbuf, jl_to_bt_context(_ctx)))
+        return;
+    sigset_t sset;
+    sigemptyset(&sset);
+    sigaddset(&sset, sig);
+    pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
+    jl_longjmp(jmpbuf, 1);
+#endif
+}
+
 static void sigint_handler(int sig)
 {
     jl_sigint_passed = 1;
diff --git a/src/signals-win.c b/src/signals-win.c
index 4c31df766f3f1..b5f8dd8bd79d9 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -86,9 +86,13 @@ void __cdecl crt_sig_handler(int sig, int num)
         }
         break;
     default: // SIGSEGV, SIGTERM, SIGILL, SIGABRT
-        if (sig == SIGSEGV && jl_get_safe_restore()) {
-            signal(sig, (void (__cdecl *)(int))crt_sig_handler);
-            jl_sig_throw();
+        if (sig == SIGSEGV) { // restarting jl_ or profile
+            jl_jmp_buf *saferestore = jl_get_safe_restore();
+            if (saferestore) {
+                signal(sig, (void (__cdecl *)(int))crt_sig_handler);
+                jl_longjmp(*saferestore, 1);
+                return;
+            }
         }
         memset(&Context, 0, sizeof(Context));
         RtlCaptureContext(&Context);
@@ -109,6 +113,8 @@ static jl_ptls_t stkerror_ptls;
 static int have_backtrace_fiber;
 static void JL_NORETURN start_backtrace_fiber(void)
 {
+    // print the warning (this mysteriously needs a lot of stack for the WriteFile syscall)
+    stack_overflow_warning();
     // collect the backtrace
     stkerror_ptls->bt_size =
         rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx,
@@ -124,41 +130,41 @@ void restore_signals(void)
     SetConsoleCtrlHandler(NULL, 0);
 }
 
-void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *excpt, PCONTEXT ctxThread)
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c);
+
+static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *excpt, PCONTEXT ctxThread)
 {
-#if defined(_CPU_X86_64_)
-    DWORD64 Rsp = (ctxThread->Rsp & (DWORD64)-16) - 8;
-#elif defined(_CPU_X86_)
-    DWORD32 Esp = (ctxThread->Esp & (DWORD32)-16) - 4;
-#else
-#error WIN16 not supported :P
-#endif
-    if (ct && !jl_get_safe_restore()) {
-        assert(excpt != NULL);
-        jl_ptls_t ptls = ct->ptls;
-        ptls->bt_size = 0;
-        if (excpt != jl_stackovf_exception) {
-            ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
-                                              ct->gcstack);
-        }
-        else if (have_backtrace_fiber) {
-            uv_mutex_lock(&backtrace_lock);
-            stkerror_ctx = ctxThread;
-            stkerror_ptls = ptls;
-            jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
-            uv_mutex_unlock(&backtrace_lock);
-        }
-        ptls->sig_exception = excpt;
+    jl_jmp_buf *saferestore = jl_get_safe_restore();
+    if (saferestore) { // restarting jl_ or profile
+        if (!jl_simulate_longjmp(*saferestore, ctxThread))
+            abort();
+        return;
+    }
+    assert(ct && excpt);
+    jl_ptls_t ptls = ct->ptls;
+    ptls->bt_size = 0;
+    if (excpt != jl_stackovf_exception) {
+        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
+                                          ct->gcstack);
+    }
+    else if (have_backtrace_fiber) {
+        uv_mutex_lock(&backtrace_lock);
+        stkerror_ctx = ctxThread;
+        stkerror_ptls = ptls;
+        jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
+        uv_mutex_unlock(&backtrace_lock);
+    }
+    ptls->sig_exception = excpt;
+    ptls->io_wait = 0;
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        if (!jl_simulate_longjmp(eh->eh_ctx, ctxThread))
+            abort();
+    }
+    else {
+        jl_no_exc_handler(excpt, ct);
     }
-#if defined(_CPU_X86_64_)
-    *(DWORD64*)Rsp = 0;
-    ctxThread->Rsp = Rsp;
-    ctxThread->Rip = (DWORD64)&jl_sig_throw;
-#elif defined(_CPU_X86_)
-    *(DWORD32*)Esp = 0;
-    ctxThread->Esp = Esp;
-    ctxThread->Eip = (DWORD)&jl_sig_throw;
-#endif
 }
 
 HANDLE hMainThread = INVALID_HANDLE_VALUE;
@@ -244,7 +250,6 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
         case EXCEPTION_STACK_OVERFLOW:
             if (ct->eh != NULL) {
                 ptls->needs_resetstkoflw = 1;
-                stack_overflow_warning();
                 jl_throw_in_ctx(ct, jl_stackovf_exception, ExceptionInfo->ContextRecord);
                 return EXCEPTION_CONTINUE_EXECUTION;
             }
@@ -421,16 +426,16 @@ static DWORD WINAPI profile_bt( LPVOID lparam )
 
                 jl_ptls_t ptls = jl_atomic_load_relaxed(&jl_all_tls_states)[0]; // given only profiling hMainThread
 
-                // store threadid but add 1 as 0 is preserved to indicate end of block
+                // META_OFFSET_THREADID store threadid but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                // store task id (never null)
+                // META_OFFSET_TASKID store task id (never null)
                 bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
 
-                // store cpu cycle clock
+                // META_OFFSET_CPUCYCLECLOCK store cpu cycle clock
                 bt_data_prof[bt_size_cur++].uintptr = cycleclock();
 
-                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                // META_OFFSET_SLEEPSTATE store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
                 bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
 
                 // Mark the end of this block with two 0's
diff --git a/src/stackwalk.c b/src/stackwalk.c
index 3dcb310c14d51..0988d7a833c94 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -5,6 +5,7 @@
   utilities for walking the stack and looking up information about code addresses
 */
 #include <inttypes.h>
+#include "gc-common.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "threading.h"
@@ -82,7 +83,7 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
         skip--;
     }
 #endif
-#if !defined(_OS_WINDOWS_)
+#if !defined(_OS_WINDOWS_) // no point on windows, since RtlVirtualUnwind won't give us a second chance if the segfault happens in ntdll
     jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
     jl_set_safe_restore(&buf);
@@ -918,16 +919,280 @@ _os_ptr_munge(uintptr_t ptr) JL_NOTSAFEPOINT
 #endif
 
 
-extern bt_context_t *jl_to_bt_context(void *sigctx);
+extern bt_context_t *jl_to_bt_context(void *sigctx) JL_NOTSAFEPOINT;
+
+// Some notes: this simulates a longjmp call occurring in context `c`, as if the
+// user was to set the PC in `c` to call longjmp and the PC in the longjmp to
+// return here. This helps work around many cases where siglongjmp out of a
+// signal handler is not supported (e.g. missing a _sigunaltstack call).
+// Additionally note that this doesn't restore the MXCSR or FP control word
+// (which some, but not most longjmp implementations do).  It also doesn't
+// support shadow stacks, so if those are in use, you might need to use a direct
+// jl_longjmp instead to leave the signal frame instead of relying on simulating
+// it and attempting to return normally.
+int jl_simulate_longjmp(jl_jmp_buf mctx, bt_context_t *c) JL_NOTSAFEPOINT
+{
+#if (defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_))
+    https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
+    return 0;
+#elif defined(_OS_WINDOWS_)
+    _JUMP_BUFFER* _ctx = (_JUMP_BUFFER*)mctx;
+    #if defined(_CPU_X86_64_)
+    c->Rbx = _ctx->Rbx;
+    c->Rsp = _ctx->Rsp;
+    c->Rbp = _ctx->Rbp;
+    c->Rsi = _ctx->Rsi;
+    c->Rdi = _ctx->Rdi;
+    c->R12 = _ctx->R12;
+    c->R13 = _ctx->R13;
+    c->R14 = _ctx->R14;
+    c->R15 = _ctx->R15;
+    c->Rip = _ctx->Rip;
+    memcpy(&c->Xmm6, &_ctx->Xmm6, 10 * sizeof(_ctx->Xmm6)); // Xmm6-Xmm15
+    // c->MxCsr = _ctx->MxCsr;
+    // c->FloatSave.ControlWord = _ctx->FpCsr;
+    // c->SegGS[0] = _ctx->Frame;
+    c->Rax = 1;
+    c->Rsp += sizeof(void*);
+    assert(c->Rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_X86_)
+    c->Ebp = _ctx->Ebp;
+    c->Ebx = _ctx->Ebx;
+    c->Edi = _ctx->Edi;
+    c->Esi = _ctx->Esi;
+    c->Esp = _ctx->Esp;
+    c->Eip = _ctx->Eip;
+    // c->SegFS[0] = _ctx->Registration;
+    // c->FloatSave.ControlWord = _ctx->FpCsr;
+    c->Eax = 1;
+    c->Esp += sizeof(void*);
+    assert(c->Esp % 16 == 0);
+    return 1;
+    #else
+    #error Windows is currently only supported on x86 and x86_64
+    #endif
+#elif defined(_OS_LINUX_) && defined(__GLIBC__)
+    __jmp_buf *_ctx = &mctx->__jmpbuf;
+    mcontext_t *mc = &c->uc_mcontext;
+    #if defined(_CPU_X86_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
+    mc->gregs[REG_EBX] = (*_ctx)[0];
+    mc->gregs[REG_ESI] = (*_ctx)[1];
+    mc->gregs[REG_EDI] = (*_ctx)[2];
+    mc->gregs[REG_EBP] = (*_ctx)[3];
+    mc->gregs[REG_ESP] = (*_ctx)[4];
+    mc->gregs[REG_EIP] = (*_ctx)[5];
+    // ifdef PTR_DEMANGLE ?
+    mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
+    mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
+    mc->gregs[REG_EAX] = 1;
+    assert(mc->gregs[REG_ESP] % 16 == 0);
+    return 1;
+    #elif defined(_CPU_X86_64_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
+    mc->gregs[REG_RBX] = (*_ctx)[0];
+    mc->gregs[REG_RBP] = (*_ctx)[1];
+    mc->gregs[REG_R12] = (*_ctx)[2];
+    mc->gregs[REG_R13] = (*_ctx)[3];
+    mc->gregs[REG_R14] = (*_ctx)[4];
+    mc->gregs[REG_R15] = (*_ctx)[5];
+    mc->gregs[REG_RSP] = (*_ctx)[6];
+    mc->gregs[REG_RIP] = (*_ctx)[7];
+    // ifdef PTR_DEMANGLE ?
+    mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
+    mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
+    mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
+    mc->gregs[REG_RAX] = 1;
+    assert(mc->gregs[REG_RSP] % 16 == 0);
+    return 1;
+    #elif defined(_CPU_ARM_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
+    mc->arm_sp = (*_ctx)[0];
+    mc->arm_lr = (*_ctx)[1];
+    mc->arm_r4 = (*_ctx)[2]; // aka v1
+    mc->arm_r5 = (*_ctx)[3]; // aka v2
+    mc->arm_r6 = (*_ctx)[4]; // aka v3
+    mc->arm_r7 = (*_ctx)[5]; // aka v4
+    mc->arm_r8 = (*_ctx)[6]; // aka v5
+    mc->arm_r9 = (*_ctx)[7]; // aka v6 aka sb
+    mc->arm_r10 = (*_ctx)[8]; // aka v7 aka sl
+    mc->arm_fp = (*_ctx)[10]; // aka v8 aka r11
+    // ifdef PTR_DEMANGLE ?
+    mc->arm_sp = ptr_demangle(mc->arm_sp);
+    mc->arm_lr = ptr_demangle(mc->arm_lr);
+    mc->arm_pc = mc->arm_lr;
+    mc->arm_r0 = 1;
+    assert(mc->arm_sp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
+    // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
+    unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
+    mc->regs[19] = (*_ctx)[0];
+    mc->regs[20] = (*_ctx)[1];
+    mc->regs[21] = (*_ctx)[2];
+    mc->regs[22] = (*_ctx)[3];
+    mc->regs[23] = (*_ctx)[4];
+    mc->regs[24] = (*_ctx)[5];
+    mc->regs[25] = (*_ctx)[6];
+    mc->regs[26] = (*_ctx)[7];
+    mc->regs[27] = (*_ctx)[8];
+    mc->regs[28] = (*_ctx)[9];
+    mc->regs[29] = (*_ctx)[10]; // aka fp
+    mc->regs[30] = (*_ctx)[11]; // aka lr
+    // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
+    mc->sp = (*_ctx)[13];
+    mcfp->vregs[7] = (*_ctx)[14]; // aka d8
+    mcfp->vregs[8] = (*_ctx)[15]; // aka d9
+    mcfp->vregs[9] = (*_ctx)[16]; // aka d10
+    mcfp->vregs[10] = (*_ctx)[17]; // aka d11
+    mcfp->vregs[11] = (*_ctx)[18]; // aka d12
+    mcfp->vregs[12] = (*_ctx)[19]; // aka d13
+    mcfp->vregs[13] = (*_ctx)[20]; // aka d14
+    mcfp->vregs[14] = (*_ctx)[21]; // aka d15
+    // ifdef PTR_DEMANGLE ?
+    mc->sp = ptr_demangle(mc->sp);
+    mc->regs[30] = ptr_demangle(mc->regs[30]);
+    mc->pc = mc->regs[30];
+    mc->regs[0] = 1;
+    assert(mc->sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown linux")
+    (void)mc;
+    (void)mctx;
+    return 0;
+    #endif
+#elif defined(_OS_DARWIN_)
+    #if defined(_CPU_X86_64_)
+    // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
+    x86_thread_state64_t *mc = (x86_thread_state64_t*)c;
+    mc->__rbx = ((uint64_t*)mctx)[0];
+    mc->__rbp = ((uint64_t*)mctx)[1];
+    mc->__rsp = ((uint64_t*)mctx)[2];
+    mc->__r12 = ((uint64_t*)mctx)[3];
+    mc->__r13 = ((uint64_t*)mctx)[4];
+    mc->__r14 = ((uint64_t*)mctx)[5];
+    mc->__r15 = ((uint64_t*)mctx)[6];
+    mc->__rip = ((uint64_t*)mctx)[7];
+    // added in libsystem_platform 177.200.16 (macOS Mojave 10.14.3)
+    // prior to that _os_ptr_munge_token was (hopefully) typically 0,
+    // so x ^ 0 == x and this is a no-op
+    mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
+    mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
+    mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
+    mc->__rax = 1;
+    assert(mc->__rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
+    // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
+    // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
+    // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
+    arm_thread_state64_t *mc = (arm_thread_state64_t*)c;
+    mc->__x[19] = ((uint64_t*)mctx)[0];
+    mc->__x[20] = ((uint64_t*)mctx)[1];
+    mc->__x[21] = ((uint64_t*)mctx)[2];
+    mc->__x[22] = ((uint64_t*)mctx)[3];
+    mc->__x[23] = ((uint64_t*)mctx)[4];
+    mc->__x[24] = ((uint64_t*)mctx)[5];
+    mc->__x[25] = ((uint64_t*)mctx)[6];
+    mc->__x[26] = ((uint64_t*)mctx)[7];
+    mc->__x[27] = ((uint64_t*)mctx)[8];
+    mc->__x[28] = ((uint64_t*)mctx)[9];
+    mc->__x[10] = ((uint64_t*)mctx)[10];
+    mc->__x[11] = ((uint64_t*)mctx)[11];
+    mc->__x[12] = ((uint64_t*)mctx)[12];
+    // 13 is reserved/unused
+    double *mcfp = (double*)&mc[1];
+    mcfp[7] = ((uint64_t*)mctx)[14]; // aka d8
+    mcfp[8] = ((uint64_t*)mctx)[15]; // aka d9
+    mcfp[9] = ((uint64_t*)mctx)[16]; // aka d10
+    mcfp[10] = ((uint64_t*)mctx)[17]; // aka d11
+    mcfp[11] = ((uint64_t*)mctx)[18]; // aka d12
+    mcfp[12] = ((uint64_t*)mctx)[19]; // aka d13
+    mcfp[13] = ((uint64_t*)mctx)[20]; // aka d14
+    mcfp[14] = ((uint64_t*)mctx)[21]; // aka d15
+    mc->__fp = _OS_PTR_UNMUNGE(mc->__x[10]);
+    mc->__lr = _OS_PTR_UNMUNGE(mc->__x[11]);
+    mc->__x[12] = _OS_PTR_UNMUNGE(mc->__x[12]);
+    mc->__sp = mc->__x[12];
+    // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
+    mc->__pc = ptrauth_strip(mc->__lr, 0);
+    mc->__pad = 0; // aka __ra_sign_state = not signed
+    mc->__x[0] = 1;
+    assert(mc->__sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown darwin")
+    (void)mctx;
+    return 0;
+#endif
+#elif defined(_OS_FREEBSD_)
+    mcontext_t *mc = &c->uc_mcontext;
+    #if defined(_CPU_X86_64_)
+    // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
+    mc->mc_rip = ((long*)mctx)[0];
+    mc->mc_rbx = ((long*)mctx)[1];
+    mc->mc_rsp = ((long*)mctx)[2];
+    mc->mc_rbp = ((long*)mctx)[3];
+    mc->mc_r12 = ((long*)mctx)[4];
+    mc->mc_r13 = ((long*)mctx)[5];
+    mc->mc_r14 = ((long*)mctx)[6];
+    mc->mc_r15 = ((long*)mctx)[7];
+    mc->mc_rax = 1;
+    mc->mc_rsp += sizeof(void*);
+    assert(mc->mc_rsp % 16 == 0);
+    return 1;
+    #elif defined(_CPU_AARCH64_)
+    mc->mc_gpregs.gp_x[19] = ((long*)mctx)[0];
+    mc->mc_gpregs.gp_x[20] = ((long*)mctx)[1];
+    mc->mc_gpregs.gp_x[21] = ((long*)mctx)[2];
+    mc->mc_gpregs.gp_x[22] = ((long*)mctx)[3];
+    mc->mc_gpregs.gp_x[23] = ((long*)mctx)[4];
+    mc->mc_gpregs.gp_x[24] = ((long*)mctx)[5];
+    mc->mc_gpregs.gp_x[25] = ((long*)mctx)[6];
+    mc->mc_gpregs.gp_x[26] = ((long*)mctx)[7];
+    mc->mc_gpregs.gp_x[27] = ((long*)mctx)[8];
+    mc->mc_gpregs.gp_x[28] = ((long*)mctx)[9];
+    mc->mc_gpregs.gp_x[29] = ((long*)mctx)[10];
+    mc->mc_gpregs.gp_lr = ((long*)mctx)[11];
+    mc->mc_gpregs.gp_sp = ((long*)mctx)[12];
+    mc->mc_fpregs.fp_q[7] = ((long*)mctx)[13];
+    mc->mc_fpregs.fp_q[8] = ((long*)mctx)[14];
+    mc->mc_fpregs.fp_q[9] = ((long*)mctx)[15];
+    mc->mc_fpregs.fp_q[10] = ((long*)mctx)[16];
+    mc->mc_fpregs.fp_q[11] = ((long*)mctx)[17];
+    mc->mc_fpregs.fp_q[12] = ((long*)mctx)[18];
+    mc->mc_fpregs.fp_q[13] = ((long*)mctx)[19];
+    mc->mc_fpregs.fp_q[14] = ((long*)mctx)[20];
+    mc->mc_gpregs.gp_x[0] = 1;
+    assert(mc->mc_gpregs.gp_sp % 16 == 0);
+    return 1;
+    #else
+    #pragma message("jl_record_backtrace not defined for ASM/SETJMP on unknown freebsd")
+    (void)mctx;
+    return 0;
+    #endif
+#else
+return 0;
+#endif
+}
 
-static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
+JL_DLLEXPORT size_t jl_record_backtrace(jl_task_t *t, jl_bt_element_t *bt_data, size_t max_bt_size) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    ptls->bt_size = 0;
     if (t == ct) {
-        ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
-        return;
+        return rec_backtrace(bt_data, max_bt_size, 0);
     }
     bt_context_t *context = NULL;
     bt_context_t c;
@@ -935,9 +1200,11 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
     while (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid) {
         int lockret = jl_lock_stackwalk();
         // if this task is already running somewhere, we need to stop the thread it is running on and query its state
-        if (!jl_thread_suspend_and_get_state(old, 0, &c)) {
+        if (!jl_thread_suspend_and_get_state(old, 1, &c)) {
             jl_unlock_stackwalk(lockret);
-            return;
+            if (jl_atomic_load_relaxed(&t->tid) != old)
+                continue;
+            return 0;
         }
         jl_unlock_stackwalk(lockret);
         if (jl_atomic_load_relaxed(&t->tid) == old) {
@@ -952,217 +1219,33 @@ static void jl_rec_backtrace(jl_task_t *t) JL_NOTSAFEPOINT
         // got the wrong thread stopped, try again
         jl_thread_resume(old);
     }
-    if (context == NULL && (!t->copy_stack && t->started && t->stkbuf != NULL)) {
+    if (context == NULL && (!t->ctx.copy_stack && t->ctx.started && t->ctx.ctx != NULL)) {
         // need to read the context from the task stored state
+        jl_jmp_buf *mctx = &t->ctx.ctx->uc_mcontext;
 #if defined(_OS_WINDOWS_)
         memset(&c, 0, sizeof(c));
-        _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
-#if defined(_CPU_X86_64_)
-        c.Rbx = mctx->Rbx;
-        c.Rsp = mctx->Rsp;
-        c.Rbp = mctx->Rbp;
-        c.Rsi = mctx->Rsi;
-        c.Rdi = mctx->Rdi;
-        c.R12 = mctx->R12;
-        c.R13 = mctx->R13;
-        c.R14 = mctx->R14;
-        c.R15 = mctx->R15;
-        c.Rip = mctx->Rip;
-        memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
-#elif defined(_CPU_X86_)
-        c.Eip = mctx->Eip;
-        c.Esp = mctx->Esp;
-        c.Ebp = mctx->Ebp;
-#else
-        #error Windows is currently only supported on x86 and x86_64
-#endif
-        context = &c;
+        if (jl_simulate_longjmp(*mctx, &c))
+            context = &c;
 #elif defined(JL_HAVE_UNW_CONTEXT)
-        context = &t->ctx.ctx;
+        context = t->ctx.ctx;
 #elif defined(JL_HAVE_UCONTEXT)
-        context = jl_to_bt_context(&t->ctx.ctx);
+        context = jl_to_bt_context(t->ctx.ctx);
 #elif defined(JL_HAVE_ASM)
         memset(&c, 0, sizeof(c));
-     #if defined(_OS_LINUX_) && defined(__GLIBC__)
-        __jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf;
-        mcontext_t *mc = &c.uc_mcontext;
-      #if defined(_CPU_X86_)
-        // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
-        // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
-        // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
-        mc->gregs[REG_EBX] = (*mctx)[0];
-        mc->gregs[REG_ESI] = (*mctx)[1];
-        mc->gregs[REG_EDI] = (*mctx)[2];
-        mc->gregs[REG_EBP] = (*mctx)[3];
-        mc->gregs[REG_ESP] = (*mctx)[4];
-        mc->gregs[REG_EIP] = (*mctx)[5];
-        // ifdef PTR_DEMANGLE ?
-        mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
-        mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
-        context = &c;
-      #elif defined(_CPU_X86_64_)
-        // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
-        // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
-        // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
-        mc->gregs[REG_RBX] = (*mctx)[0];
-        mc->gregs[REG_RBP] = (*mctx)[1];
-        mc->gregs[REG_R12] = (*mctx)[2];
-        mc->gregs[REG_R13] = (*mctx)[3];
-        mc->gregs[REG_R14] = (*mctx)[4];
-        mc->gregs[REG_R15] = (*mctx)[5];
-        mc->gregs[REG_RSP] = (*mctx)[6];
-        mc->gregs[REG_RIP] = (*mctx)[7];
-        // ifdef PTR_DEMANGLE ?
-        mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
-        mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
-        mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
-        context = &c;
-      #elif defined(_CPU_ARM_)
-        // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
-        // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
-        // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
-        mc->arm_sp = (*mctx)[0];
-        mc->arm_lr = (*mctx)[1];
-        mc->arm_r4 = (*mctx)[2]; // aka v1
-        mc->arm_r5 = (*mctx)[3]; // aka v2
-        mc->arm_r6 = (*mctx)[4]; // aka v3
-        mc->arm_r7 = (*mctx)[5]; // aka v4
-        mc->arm_r8 = (*mctx)[6]; // aka v5
-        mc->arm_r9 = (*mctx)[7]; // aka v6 aka sb
-        mc->arm_r10 = (*mctx)[8]; // aka v7 aka sl
-        mc->arm_fp = (*mctx)[10]; // aka v8 aka r11
-        // ifdef PTR_DEMANGLE ?
-        mc->arm_sp = ptr_demangle(mc->arm_sp);
-        mc->arm_lr = ptr_demangle(mc->arm_lr);
-        mc->arm_pc = mc->arm_lr;
-        context = &c;
-      #elif defined(_CPU_AARCH64_)
-        // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
-        // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
-        // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
-        // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
-        unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
-        mc->regs[19] = (*mctx)[0];
-        mc->regs[20] = (*mctx)[1];
-        mc->regs[21] = (*mctx)[2];
-        mc->regs[22] = (*mctx)[3];
-        mc->regs[23] = (*mctx)[4];
-        mc->regs[24] = (*mctx)[5];
-        mc->regs[25] = (*mctx)[6];
-        mc->regs[26] = (*mctx)[7];
-        mc->regs[27] = (*mctx)[8];
-        mc->regs[28] = (*mctx)[9];
-        mc->regs[29] = (*mctx)[10]; // aka fp
-        mc->regs[30] = (*mctx)[11]; // aka lr
-        // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
-        mc->sp = (*mctx)[13];
-        mcfp->vregs[7] = (*mctx)[14]; // aka d8
-        mcfp->vregs[8] = (*mctx)[15]; // aka d9
-        mcfp->vregs[9] = (*mctx)[16]; // aka d10
-        mcfp->vregs[10] = (*mctx)[17]; // aka d11
-        mcfp->vregs[11] = (*mctx)[18]; // aka d12
-        mcfp->vregs[12] = (*mctx)[19]; // aka d13
-        mcfp->vregs[13] = (*mctx)[20]; // aka d14
-        mcfp->vregs[14] = (*mctx)[21]; // aka d15
-        // ifdef PTR_DEMANGLE ?
-        mc->sp = ptr_demangle(mc->sp);
-        mc->regs[30] = ptr_demangle(mc->regs[30]);
-        mc->pc = mc->regs[30];
-        context = &c;
-      #else
-       #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux")
-       (void)mc;
-       (void)c;
-       (void)mctx;
-      #endif
-     #elif defined(_OS_DARWIN_)
-        sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-      #if defined(_CPU_X86_64_)
-        // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
-        x86_thread_state64_t *mc = (x86_thread_state64_t*)&c;
-        mc->__rbx = ((uint64_t*)mctx)[0];
-        mc->__rbp = ((uint64_t*)mctx)[1];
-        mc->__rsp = ((uint64_t*)mctx)[2];
-        mc->__r12 = ((uint64_t*)mctx)[3];
-        mc->__r13 = ((uint64_t*)mctx)[4];
-        mc->__r14 = ((uint64_t*)mctx)[5];
-        mc->__r15 = ((uint64_t*)mctx)[6];
-        mc->__rip = ((uint64_t*)mctx)[7];
-        // added in libsystem_platform 177.200.16 (macOS Mojave 10.14.3)
-        // prior to that _os_ptr_munge_token was (hopefully) typically 0,
-        // so x ^ 0 == x and this is a no-op
-        mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
-        mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
-        mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
-        context = &c;
-      #elif defined(_CPU_AARCH64_)
-        // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
-        // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
-        // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
-        arm_thread_state64_t *mc = (arm_thread_state64_t*)&c;
-        mc->__x[19] = ((uint64_t*)mctx)[0];
-        mc->__x[20] = ((uint64_t*)mctx)[1];
-        mc->__x[21] = ((uint64_t*)mctx)[2];
-        mc->__x[22] = ((uint64_t*)mctx)[3];
-        mc->__x[23] = ((uint64_t*)mctx)[4];
-        mc->__x[24] = ((uint64_t*)mctx)[5];
-        mc->__x[25] = ((uint64_t*)mctx)[6];
-        mc->__x[26] = ((uint64_t*)mctx)[7];
-        mc->__x[27] = ((uint64_t*)mctx)[8];
-        mc->__x[28] = ((uint64_t*)mctx)[9];
-        mc->__x[10] = ((uint64_t*)mctx)[10];
-        mc->__x[11] = ((uint64_t*)mctx)[11];
-        mc->__x[12] = ((uint64_t*)mctx)[12];
-        // 13 is reserved/unused
-        double *mcfp = (double*)&mc[1];
-        mcfp[7] = ((uint64_t*)mctx)[14]; // aka d8
-        mcfp[8] = ((uint64_t*)mctx)[15]; // aka d9
-        mcfp[9] = ((uint64_t*)mctx)[16]; // aka d10
-        mcfp[10] = ((uint64_t*)mctx)[17]; // aka d11
-        mcfp[11] = ((uint64_t*)mctx)[18]; // aka d12
-        mcfp[12] = ((uint64_t*)mctx)[19]; // aka d13
-        mcfp[13] = ((uint64_t*)mctx)[20]; // aka d14
-        mcfp[14] = ((uint64_t*)mctx)[21]; // aka d15
-        mc->__fp = _OS_PTR_UNMUNGE(mc->__x[10]);
-        mc->__lr = _OS_PTR_UNMUNGE(mc->__x[11]);
-        mc->__x[12] = _OS_PTR_UNMUNGE(mc->__x[12]);
-        mc->__sp = mc->__x[12];
-        // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
-        mc->__pc = ptrauth_strip(mc->__lr, 0);
-        mc->__pad = 0; // aka __ra_sign_state = not signed
-        context = &c;
-      #else
-       #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin")
-        (void)mctx;
-        (void)c;
-      #endif
-     #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
-        sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
-        mcontext_t *mc = &c.uc_mcontext;
-        // https://github.com/freebsd/freebsd-src/blob/releng/13.1/lib/libc/amd64/gen/_setjmp.S
-        mc->mc_rip = ((long*)mctx)[0];
-        mc->mc_rbx = ((long*)mctx)[1];
-        mc->mc_rsp = ((long*)mctx)[2];
-        mc->mc_rbp = ((long*)mctx)[3];
-        mc->mc_r12 = ((long*)mctx)[4];
-        mc->mc_r13 = ((long*)mctx)[5];
-        mc->mc_r14 = ((long*)mctx)[6];
-        mc->mc_r15 = ((long*)mctx)[7];
-        context = &c;
-     #else
-      #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
-      (void)c;
-     #endif
+        if (jl_simulate_longjmp(*mctx, &c))
+            context = &c;
 #else
-     #pragma message("jl_rec_backtrace not defined for unknown task system")
+     #pragma message("jl_record_backtrace not defined for unknown task system")
 #endif
     }
+    size_t bt_size = 0;
     if (context)
-        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context,  t->gcstack);
+        bt_size = rec_backtrace_ctx(bt_data, max_bt_size, context, t->gcstack);
     if (old == -1)
         jl_atomic_store_relaxed(&t->tid, old);
     else if (old != ptls->tid)
         jl_thread_resume(old);
+    return bt_size;
 }
 
 //--------------------------------------------------
@@ -1194,12 +1277,15 @@ JL_DLLEXPORT void jlbacktracet(jl_task_t *t) JL_NOTSAFEPOINT
 {
     jl_task_t *ct = jl_current_task;
     jl_ptls_t ptls = ct->ptls;
-    jl_rec_backtrace(t);
-    size_t i, bt_size = ptls->bt_size;
+    ptls->bt_size = 0;
     jl_bt_element_t *bt_data = ptls->bt_data;
+    size_t bt_size = jl_record_backtrace(t, bt_data, JL_MAX_BT_SIZE);
+    size_t i;
     for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
         jl_print_bt_entry_codeloc(bt_data + i);
     }
+    if (bt_size == 0)
+        jl_safe_printf("      no backtrace recorded\n");
 }
 
 JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
@@ -1215,15 +1301,15 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
     size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
     jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
     for (size_t i = 0; i < nthreads; i++) {
-        // skip GC threads since they don't have tasks
-        if (gc_first_tid <= i && i < gc_first_tid + jl_n_gcthreads) {
+        jl_ptls_t ptls2 = allstates[i];
+        if (gc_is_collector_thread(i)) {
+            jl_safe_printf("==== Skipping backtrace for parallel/concurrent GC thread %zu\n", i + 1);
             continue;
         }
-        jl_ptls_t ptls2 = allstates[i];
         if (ptls2 == NULL) {
             continue;
         }
-        small_arraylist_t *live_tasks = &ptls2->gc_tls.heap.live_tasks;
+        small_arraylist_t *live_tasks = &ptls2->gc_tls_common.heap.live_tasks;
         size_t n = mtarraylist_length(live_tasks);
         int t_state = JL_TASK_STATE_DONE;
         jl_task_t *t = ptls2->root_task;
@@ -1235,14 +1321,9 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
             jl_safe_printf("     ---- Root task (%p)\n", ptls2->root_task);
             if (t != NULL) {
                 jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
-                        t->sticky, t->started, t_state,
+                        t->sticky, t->ctx.started, t_state,
                         jl_atomic_load_relaxed(&t->tid) + 1);
-                if (t->stkbuf != NULL) {
-                    jlbacktracet(t);
-                }
-                else {
-                    jl_safe_printf("      no stack\n");
-                }
+                jlbacktracet(t);
             }
             jl_safe_printf("     ---- End root task\n");
         }
@@ -1257,12 +1338,9 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
             jl_safe_printf("     ---- Task %zu (%p)\n", j + 1, t);
             // n.b. this information might not be consistent with the stack printing after it, since it could start running or change tid, etc.
             jl_safe_printf("          (sticky: %d, started: %d, state: %d, tid: %d)\n",
-                    t->sticky, t->started, t_state,
+                    t->sticky, t->ctx.started, t_state,
                     jl_atomic_load_relaxed(&t->tid) + 1);
-            if (t->stkbuf != NULL)
-                jlbacktracet(t);
-            else
-                jl_safe_printf("      no stack\n");
+            jlbacktracet(t);
             jl_safe_printf("     ---- End task %zu\n", j + 1);
         }
         jl_safe_printf("==== End thread %d\n", ptls2->tid + 1);
diff --git a/src/staticdata.c b/src/staticdata.c
index a8b45534f793b..af24a84f39854 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -100,7 +100,7 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-#define NUM_TAGS    191
+#define NUM_TAGS    193
 
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
@@ -122,6 +122,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_array_type);
         INSERT_TAG(jl_expr_type);
         INSERT_TAG(jl_binding_type);
+        INSERT_TAG(jl_binding_partition_type);
         INSERT_TAG(jl_globalref_type);
         INSERT_TAG(jl_string_type);
         INSERT_TAG(jl_module_type);
@@ -311,6 +312,7 @@ jl_value_t **const*const get_tags(void) {
         INSERT_TAG(jl_builtin_modifyglobal);
         INSERT_TAG(jl_builtin_replaceglobal);
         INSERT_TAG(jl_builtin_setglobalonce);
+        INSERT_TAG(jl_builtin_current_scope);
         // n.b. must update NUM_TAGS when you add something here
 #undef INSERT_TAG
         assert(i == NUM_TAGS - 1);
@@ -349,6 +351,21 @@ arraylist_t eytzinger_idxs;
 static uintptr_t img_min;
 static uintptr_t img_max;
 
+// HT_NOTFOUND is a valid integer ID, so we store the integer ids mangled.
+// This pair of functions mangles/demanges
+static size_t from_seroder_entry(void *entry)
+{
+    return (size_t)((char*)entry - (char*)HT_NOTFOUND - 1);
+}
+
+static void *to_seroder_entry(size_t idx)
+{
+    return (void*)((char*)HT_NOTFOUND + 1 + idx);
+}
+
+static htable_t new_methtables;
+static size_t precompilation_world;
+
 static int ptr_cmp(const void *l, const void *r)
 {
     uintptr_t left = *(const uintptr_t*)l;
@@ -484,7 +501,7 @@ static htable_t relocatable_ext_cis;
 // (reverse of fptr_to_id)
 // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C.
 static const jl_fptr_args_t id_to_fptrs[] = {
-    &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
+    &jl_f_throw, &jl_f_throw_methoderror, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
     &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure,
     &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined,
     &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call,
@@ -563,6 +580,8 @@ enum RefTags {
     ExternalLinkage     // reference to some other pkgimage
 };
 
+#define SYS_EXTERNAL_LINK_UNIT sizeof(void*)
+
 // calling conventions for internal entry points.
 // this is used to set the method-instance->invoke field
 typedef enum {
@@ -758,22 +777,41 @@ static uintptr_t jl_fptr_id(void *fptr)
 #define jl_queue_for_serialization(s, v) jl_queue_for_serialization_((s), (jl_value_t*)(v), 1, 0)
 static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED;
 
-
 static void jl_queue_module_for_serialization(jl_serializer_state *s, jl_module_t *m) JL_GC_DISABLED
 {
     jl_queue_for_serialization(s, m->name);
     jl_queue_for_serialization(s, m->parent);
-    jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings));
+    if (jl_options.trim) {
+        jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->bindings), 0, 1);
+    } else {
+        jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindings));
+    }
     jl_queue_for_serialization(s, jl_atomic_load_relaxed(&m->bindingkeyset));
-    if (jl_options.strip_metadata) {
+    if (jl_options.strip_metadata || jl_options.trim) {
         jl_svec_t *table = jl_atomic_load_relaxed(&m->bindings);
         for (size_t i = 0; i < jl_svec_len(table); i++) {
             jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
             if ((void*)b == jl_nothing)
                 break;
-            jl_sym_t *name = b->globalref->name;
-            if (name == jl_docmeta_sym && jl_atomic_load_relaxed(&b->value))
-                record_field_change((jl_value_t**)&b->value, jl_nothing);
+            if (jl_options.strip_metadata) {
+                jl_sym_t *name = b->globalref->name;
+                if (name == jl_docmeta_sym && jl_get_binding_value(b))
+                    record_field_change((jl_value_t**)&b->value, jl_nothing);
+            }
+            if (jl_options.trim) {
+                jl_value_t *val = jl_get_binding_value(b);
+                // keep binding objects that are defined and ...
+                if (val &&
+                    // ... point to modules ...
+                    (jl_is_module(val) ||
+                     // ... or point to __init__ methods ...
+                     !strcmp(jl_symbol_name(b->globalref->name), "__init__") ||
+                     // ... or point to Base functions accessed by the runtime
+                     (m == jl_base_module && (!strcmp(jl_symbol_name(b->globalref->name), "wait") ||
+                                              !strcmp(jl_symbol_name(b->globalref->name), "task_done_hook"))))) {
+                    jl_queue_for_serialization(s, b);
+                }
+            }
         }
     }
 
@@ -926,14 +964,34 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
     else if (jl_typetagis(v, jl_module_tag << 4)) {
         jl_queue_module_for_serialization(s, (jl_module_t*)v);
     }
+    else if (jl_is_binding_partition(v)) {
+        jl_binding_partition_t *bpart = (jl_binding_partition_t*)v;
+        jl_queue_for_serialization_(s, decode_restriction_value(jl_atomic_load_relaxed(&bpart->restriction)), 1, immediate);
+        jl_queue_for_serialization_(s, get_replaceable_field((jl_value_t**)&bpart->next, 0), 1, immediate);
+    }
     else if (layout->nfields > 0) {
+        if (jl_options.trim) {
+            if (jl_is_method(v)) {
+                jl_method_t *m = (jl_method_t *)v;
+                if (jl_is_svec(jl_atomic_load_relaxed(&m->specializations)))
+                    jl_queue_for_serialization_(s, (jl_value_t*)jl_atomic_load_relaxed(&m->specializations), 0, 1);
+            }
+            else if (jl_typetagis(v, jl_typename_type)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
+                if (tn->mt != NULL && !tn->mt->frozen) {
+                    jl_methtable_t * new_methtable = (jl_methtable_t *)ptrhash_get(&new_methtables, tn->mt);
+                    if (new_methtable != HT_NOTFOUND)
+                        record_field_change((jl_value_t **)&tn->mt, (jl_value_t*)new_methtable);
+                    else
+                        record_field_change((jl_value_t **)&tn->mt, NULL);
+                }
+            }
+        }
         char *data = (char*)jl_data_ptr(v);
         size_t i, np = layout->npointers;
         for (i = 0; i < np; i++) {
             uint32_t ptr = jl_ptr_offset(t, i);
             int mutabl = t->name->mutabl;
-            if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
-                mutabl = 0;
             jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
             jl_queue_for_serialization_(s, fld, 1, immediate);
         }
@@ -947,7 +1005,7 @@ done_fields: ;
     arraylist_push(&serialization_queue, (void*) v);
     size_t idx = serialization_queue.len - 1;
     assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");
-    *bp = (void*)((char*)HT_NOTFOUND + 1 + idx);
+    *bp = to_seroder_entry(idx);
 
     // DataType is very unusual, in that some of the fields need to be pre-order, and some
     // (notably super) must not be (even if `jl_queue_for_serialization_` would otherwise
@@ -974,6 +1032,7 @@ done_fields: ;
     }
 }
 
+
 static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
 {
     if (!jl_needs_serialization(s, v))
@@ -1068,8 +1127,8 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
         // We found the sysimg/pkg that this item links against
         // Compute the relocation code
         size_t offset = (uintptr_t)v - (uintptr_t)jl_linkage_blobs.items[2*i];
-        offset /= sizeof(void*);
-        assert(offset < ((uintptr_t)1 << DEPS_IDX_OFFSET) && "offset to external image too large");
+        assert((offset % SYS_EXTERNAL_LINK_UNIT) == 0);
+        offset /= SYS_EXTERNAL_LINK_UNIT;
         assert(n_linkage_blobs() == jl_array_nrows(s->buildid_depmods_idxs));
         size_t depsidx = jl_array_data(s->buildid_depmods_idxs, uint32_t)[i]; // map from build_id_idx -> deps_idx
         assert(depsidx < INT32_MAX);
@@ -1081,6 +1140,7 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
         jl_array_grow_end(link_ids, 1);
         uint32_t *link_id_data  = jl_array_data(link_ids, uint32_t);  // wait until after the `grow`
         link_id_data[jl_array_nrows(link_ids) - 1] = depsidx;
+        assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "offset to external image too large");
         return ((uintptr_t)ExternalLinkage << RELOC_TAG_OFFSET) + offset;
     }
     return 0;
@@ -1093,19 +1153,19 @@ static uintptr_t add_external_linkage(jl_serializer_state *s, jl_value_t *v, jl_
 static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *link_ids) JL_NOTSAFEPOINT
 {
     assert(v != NULL && "cannot get backref to NULL object");
-    void *idx = HT_NOTFOUND;
     if (jl_is_symbol(v)) {
         void **pidx = ptrhash_bp(&symbol_table, v);
-        idx = *pidx;
+        void *idx = *pidx;
         if (idx == HT_NOTFOUND) {
             size_t l = strlen(jl_symbol_name((jl_sym_t*)v));
             write_uint32(s->symbols, l);
             ios_write(s->symbols, jl_symbol_name((jl_sym_t*)v), l + 1);
             size_t offset = ++nsym_tag;
             assert(offset < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many symbols");
-            idx = (void*)((char*)HT_NOTFOUND + ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + offset);
+            idx = to_seroder_entry(offset - 1);
             *pidx = idx;
         }
+        return ((uintptr_t)SymbolRef << RELOC_TAG_OFFSET) + from_seroder_entry(idx);
     }
     else if (v == (jl_value_t*)s->ptls->root_task) {
         return (uintptr_t)TagRef << RELOC_TAG_OFFSET;
@@ -1133,17 +1193,15 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v, jl_array_t *
         assert(item && "no external linkage identified");
         return item;
     }
+    void *idx = ptrhash_get(&serialization_order, v);
     if (idx == HT_NOTFOUND) {
-        idx = ptrhash_get(&serialization_order, v);
-        if (idx == HT_NOTFOUND) {
-            jl_(jl_typeof(v));
-            jl_(v);
-        }
-        assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
-        assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
-        assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
+        jl_(jl_typeof(v));
+        jl_(v);
     }
-    return (char*)idx - 1 - (char*)HT_NOTFOUND;
+    assert(idx != HT_NOTFOUND && "object missed during jl_queue_for_serialization pass");
+    assert(idx != (void*)(uintptr_t)-1 && "object missed during jl_insert_into_serialization_queue pass");
+    assert(idx != (void*)(uintptr_t)-2 && "object missed during jl_insert_into_serialization_queue pass");
+    return ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + from_seroder_entry(idx);
 }
 
 
@@ -1205,6 +1263,9 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
     jl_atomic_store_relaxed(&newm->bindingkeyset, NULL);
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, bindingkeyset)));
     arraylist_push(&s->relocs_list, (void*)backref_id(s, jl_atomic_load_relaxed(&m->bindingkeyset), s->link_ids_relocs));
+    newm->file = NULL;
+    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, file)));
+    arraylist_push(&s->relocs_list, (void*)backref_id(s, m->file, s->link_ids_relocs));
 
     // write out the usings list
     memset(&newm->usings._space, 0, sizeof(newm->usings._space));
@@ -1345,7 +1406,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
 
         if (s->incremental) {
             if (needs_uniquing(v)) {
-                if (jl_is_method_instance(v)) {
+                if (jl_typetagis(v, jl_binding_type)) {
+                    jl_binding_t *b = (jl_binding_t*)v;
+                    if (b->globalref == NULL)
+                        jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
+                    write_pointerfield(s, (jl_value_t*)b->globalref->mod);
+                    write_pointerfield(s, (jl_value_t*)b->globalref->name);
+                    continue;
+                }
+                else if (jl_is_method_instance(v)) {
                     assert(f == s->s);
                     jl_method_instance_t *mi = (jl_method_instance_t*)v;
                     write_pointerfield(s, mi->def.value);
@@ -1368,17 +1437,6 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             else if (needs_recaching(v)) {
                 arraylist_push(jl_is_datatype(v) ? &s->fixup_types : &s->fixup_objs, (void*)reloc_offset);
             }
-            else if (jl_typetagis(v, jl_binding_type)) {
-                jl_binding_t *b = (jl_binding_t*)v;
-                if (b->globalref == NULL)
-                    jl_error("Binding cannot be serialized"); // no way (currently) to recover its identity
-                // Assign type Any to any owned bindings that don't have a type.
-                // We don't want these accidentally managing to diverge later in different compilation units.
-                if (jl_atomic_load_relaxed(&b->owner) == b) {
-                    jl_value_t *old_ty = NULL;
-                    jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
-                }
-            }
         }
 
         // write data
@@ -1564,6 +1622,26 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             ios_write(s->const_data, (char*)pdata, nb);
             write_pointer(f);
         }
+        else if (jl_is_binding_partition(v)) {
+            jl_binding_partition_t *bpart = (jl_binding_partition_t*)v;
+            jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+            jl_value_t *restriction_val = decode_restriction_value(pku);
+            static_assert(offsetof(jl_binding_partition_t, restriction) == 0, "BindingPartition layout mismatch");
+            write_pointerfield(s, restriction_val);
+#ifndef _P64
+            write_uint(f, decode_restriction_kind(pku));
+#endif
+            write_uint(f, bpart->min_world);
+            write_uint(f, jl_atomic_load_relaxed(&bpart->max_world));
+            write_pointerfield(s, (jl_value_t*)jl_atomic_load_relaxed(&bpart->next));
+#ifdef _P64
+            write_uint(f, decode_restriction_kind(pku)); // This will be moved back into place during deserialization (if necessary)
+            static_assert(sizeof(jl_binding_partition_t) == 5*sizeof(void*), "BindingPartition layout mismatch");
+#else
+            write_uint(f, 0);
+            static_assert(sizeof(jl_binding_partition_t) == 6*sizeof(void*), "BindingPartition layout mismatch");
+#endif
+        }
         else {
             // Generic object::DataType serialization by field
             const char *data = (const char*)v;
@@ -1590,8 +1668,6 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             for (i = 0; i < np; i++) {
                 size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*);
                 int mutabl = t->name->mutabl;
-                if (jl_is_binding(v) && ((jl_binding_t*)v)->constp && i == 0) // value field depends on constp field
-                    mutabl = 0;
                 jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset], mutabl);
                 size_t fld_pos = offset + reloc_offset;
                 if (fld != NULL) {
@@ -1649,7 +1725,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
             else if (jl_is_method_instance(v)) {
                 assert(f == s->s);
                 jl_method_instance_t *newmi = (jl_method_instance_t*)&f->buf[reloc_offset];
-                jl_atomic_store_relaxed(&newmi->precompiled, 0);
+                jl_atomic_store_relaxed(&newmi->flags, 0);
             }
             else if (jl_is_code_instance(v)) {
                 assert(f == s->s);
@@ -1767,7 +1843,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
                     }
                 }
                 void *superidx = ptrhash_get(&serialization_order, dt->super);
-                if (s->incremental && superidx != HT_NOTFOUND && (char*)superidx - 1 - (char*)HT_NOTFOUND > item && needs_uniquing((jl_value_t*)dt->super))
+                if (s->incremental && superidx != HT_NOTFOUND && from_seroder_entry(superidx) > item && needs_uniquing((jl_value_t*)dt->super))
                     arraylist_push(&s->uniquing_super, dt->super);
             }
             else if (jl_is_typename(v)) {
@@ -1961,7 +2037,7 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         assert(s->buildid_depmods_idxs && depsidx < jl_array_len(s->buildid_depmods_idxs));
         size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
         assert(2*i < jl_linkage_blobs.len);
-        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*SYS_EXTERNAL_LINK_UNIT;
     }
     case ExternalLinkage: {
         assert(link_ids);
@@ -1972,7 +2048,7 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
         assert(depsidx < jl_array_len(s->buildid_depmods_idxs));
         size_t i = jl_array_data(s->buildid_depmods_idxs, uint32_t)[depsidx];
         assert(2*i < jl_linkage_blobs.len);
-        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*sizeof(void*);
+        return (uintptr_t)jl_linkage_blobs.items[2*i] + offset*SYS_EXTERNAL_LINK_UNIT;
     }
     }
     abort();
@@ -2356,13 +2432,13 @@ static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED
 
     void *idx = ptrhash_get(&serialization_order, cache);
     assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
-    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == cache);
+    assert(serialization_queue.items[from_seroder_entry(idx)] == cache);
     cache = cache_rehash_set(cache, sz);
     // redirect all references to the old cache to relocate to the new cache object
     PTRHASH_PIN((void*)cache)
     PTRHASH_PIN((void*)idx)
     ptrhash_put(&serialization_order, cache, idx);
-    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = cache;
+    serialization_queue.items[from_seroder_entry(idx)] = cache;
     return cache;
 }
 
@@ -2380,6 +2456,53 @@ static void jl_prune_type_cache_linear(jl_svec_t *cache)
         jl_svecset(cache, ins++, jl_nothing);
 }
 
+uint_t bindingkey_hash(size_t idx, jl_value_t *data);
+
+static void jl_prune_module_bindings(jl_module_t * m) JL_GC_DISABLED
+{
+    jl_svec_t * bindings = jl_atomic_load_relaxed(&m->bindings);
+    size_t l = jl_svec_len(bindings), i;
+    arraylist_t bindings_list;
+    arraylist_new(&bindings_list, 0);
+    if (l == 0)
+        return;
+    for (i = 0; i < l; i++) {
+        jl_value_t *ti = jl_svecref(bindings, i);
+        if (ti == jl_nothing)
+            continue;
+        jl_binding_t *ref = ((jl_binding_t*)ti);
+        if (!((ptrhash_get(&serialization_order, ref) == HT_NOTFOUND) &&
+            (ptrhash_get(&serialization_order, ref->globalref) == HT_NOTFOUND))) {
+            jl_svecset(bindings, i, jl_nothing);
+            arraylist_push(&bindings_list, ref);
+        }
+    }
+    jl_genericmemory_t* bindingkeyset = jl_atomic_load_relaxed(&m->bindingkeyset);
+    _Atomic(jl_genericmemory_t*)bindingkeyset2;
+    jl_atomic_store_relaxed(&bindingkeyset2,(jl_genericmemory_t*)jl_an_empty_memory_any);
+    jl_svec_t *bindings2 = jl_alloc_svec_uninit(bindings_list.len);
+    for (i = 0; i < bindings_list.len; i++) {
+        jl_binding_t *ref = (jl_binding_t*)bindings_list.items[i];
+        jl_svecset(bindings2, i, ref);
+        jl_smallintset_insert(&bindingkeyset2, (jl_value_t*)m, bindingkey_hash, i, (jl_value_t*)bindings2);
+    }
+    void *idx = ptrhash_get(&serialization_order, bindings);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == bindings);
+    ptrhash_put(&serialization_order, bindings2, idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = bindings2;
+
+    idx = ptrhash_get(&serialization_order, bindingkeyset);
+    assert(idx != HT_NOTFOUND && idx != (void*)(uintptr_t)-1);
+    assert(serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] == bindingkeyset);
+    ptrhash_put(&serialization_order, jl_atomic_load_relaxed(&bindingkeyset2), idx);
+    serialization_queue.items[(char*)idx - 1 - (char*)HT_NOTFOUND] = jl_atomic_load_relaxed(&bindingkeyset2);
+    jl_atomic_store_relaxed(&m->bindings, bindings2);
+    jl_atomic_store_relaxed(&m->bindingkeyset, jl_atomic_load_relaxed(&bindingkeyset2));
+    jl_gc_wb(m, bindings2);
+    jl_gc_wb(m, jl_atomic_load_relaxed(&bindingkeyset2));
+}
+
 static void strip_slotnames(jl_array_t *slotnames)
 {
     // replace slot names with `?`, except unused_sym since the compiler looks at it
@@ -2446,7 +2569,7 @@ static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
     if (m->source) {
         int stripped_ir = 0;
         if (jl_options.strip_ir) {
-            int should_strip_ir = 0;
+            int should_strip_ir = jl_options.trim;
             if (!should_strip_ir) {
                 if (jl_atomic_load_relaxed(&m->unspecialized)) {
                     jl_code_instance_t *unspec = jl_atomic_load_relaxed(&jl_atomic_load_relaxed(&m->unspecialized)->cache);
@@ -2648,8 +2771,46 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     // strip metadata and IR when requested
     if (jl_options.strip_metadata || jl_options.strip_ir)
         jl_strip_all_codeinfos();
+    // collect needed methods and replace method tables that are in the tags array
+    htable_new(&new_methtables, 0);
+    arraylist_t MIs;
+    arraylist_new(&MIs, 0);
+    arraylist_t gvars;
+    arraylist_new(&gvars, 0);
+    arraylist_t external_fns;
+    arraylist_new(&external_fns, 0);
 
     int en = jl_gc_enable(0);
+    if (native_functions) {
+        jl_get_llvm_gvs(native_functions, &gvars);
+        jl_get_llvm_external_fns(native_functions, &external_fns);
+        if (jl_options.trim)
+            jl_get_llvm_mis(native_functions, &MIs);
+    }
+    if (jl_options.trim) {
+        jl_rebuild_methtables(&MIs, &new_methtables);
+        jl_methtable_t *mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_type_type_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_type_type_mt = mt;
+        else
+            jl_type_type_mt = jl_new_method_table(jl_type_type_mt->name, jl_type_type_mt->module);
+
+        mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_kwcall_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_kwcall_mt = mt;
+        else
+            jl_kwcall_mt = jl_new_method_table(jl_kwcall_mt->name, jl_kwcall_mt->module);
+
+        mt = (jl_methtable_t *)ptrhash_get(&new_methtables, jl_nonfunction_mt);
+        JL_GC_PROMISE_ROOTED(mt);
+        if (mt != HT_NOTFOUND)
+            jl_nonfunction_mt = mt;
+        else
+            jl_nonfunction_mt = jl_new_method_table(jl_nonfunction_mt->name, jl_nonfunction_mt->module);
+    }
+
     nsym_tag = 0;
     htable_new(&symbol_table, 0);
     htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs));
@@ -2696,14 +2857,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     htable_new(&s.callers_with_edges, 0);
     jl_value_t **const*const tags = get_tags(); // worklist == NULL ? get_tags() : NULL;
 
-    arraylist_t gvars;
-    arraylist_t external_fns;
-    arraylist_new(&gvars, 0);
-    arraylist_new(&external_fns, 0);
-    if (native_functions) {
-        jl_get_llvm_gvs(native_functions, &gvars);
-        jl_get_llvm_external_fns(native_functions, &external_fns);
-    }
 
     if (worklist == NULL) {
         // empty!(Core.ARGS)
@@ -2762,6 +2915,8 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
         // step 1.2: ensure all gvars are part of the sysimage too
         record_gvars(&s, &gvars);
         record_external_fns(&s, &external_fns);
+        if (jl_options.trim)
+            record_gvars(&s, &MIs);
         jl_serialize_reachable(&s);
         // step 1.3: prune (garbage collect) special weak references from the jl_global_roots_list
         if (worklist == NULL) {
@@ -2782,8 +2937,30 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
         // step 1.4: prune (garbage collect) some special weak references from
         // built-in type caches too
         for (i = 0; i < serialization_queue.len; i++) {
-            jl_typename_t *tn = (jl_typename_t*)serialization_queue.items[i];
-            if (jl_is_typename(tn)) {
+            jl_value_t *v = (jl_value_t*)serialization_queue.items[i];
+            if (jl_options.trim) {
+                if (jl_is_method(v)){
+                    jl_method_t *m = (jl_method_t*)v;
+                    jl_value_t *specializations_ = jl_atomic_load_relaxed(&m->specializations);
+                    if (!jl_is_svec(specializations_))
+                        continue;
+
+                    jl_svec_t *specializations = (jl_svec_t *)specializations_;
+                    size_t l = jl_svec_len(specializations), i;
+                    for (i = 0; i < l; i++) {
+                        jl_value_t *mi = jl_svecref(specializations, i);
+                        if (mi == jl_nothing)
+                            continue;
+                        if (ptrhash_get(&serialization_order, mi) == HT_NOTFOUND)
+                            jl_svecset(specializations, i, jl_nothing);
+                    }
+                } else if (jl_is_module(v)) {
+                    jl_prune_module_bindings((jl_module_t*)v);
+                }
+            }
+            // Not else
+            if (jl_is_typename(v)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
                 jl_atomic_store_relaxed(&tn->cache,
                     jl_prune_type_cache_hash(jl_atomic_load_relaxed(&tn->cache)));
                 jl_gc_wb(tn, jl_atomic_load_relaxed(&tn->cache));
@@ -2892,7 +3069,9 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
             jl_write_value(&s, global_roots_keyset);
             jl_write_value(&s, s.ptls->root_task->tls);
             write_uint32(f, jl_get_gs_ctr());
-            write_uint(f, jl_atomic_load_acquire(&jl_world_counter));
+            size_t world = jl_atomic_load_acquire(&jl_world_counter);
+            // assert(world == precompilation_world); // This triggers on a normal build of julia
+            write_uint(f, world);
             write_uint(f, jl_typeinf_world);
         }
         else {
@@ -2945,6 +3124,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
     htable_free(&nullptrs);
     htable_free(&symbol_table);
     htable_free(&fptr_to_id);
+    htable_free(&new_methtables);
     nsym_tag = 0;
 
     jl_gc_enable(en);
@@ -2974,6 +3154,10 @@ static void jl_write_header_for_incremental(ios_t *f, jl_array_t *worklist, jl_a
 JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *worklist, bool_t emit_split,
                                          ios_t **s, ios_t **z, jl_array_t **udeps, int64_t *srctextpos)
 {
+    if (jl_options.strip_ir || jl_options.trim) {
+        // make sure this is precompiled for jl_foreach_reachable_mtable
+        jl_get_loaded_modules();
+    }
     jl_gc_collect(JL_GC_FULL);
     jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
     JL_TIMING(SYSIMG_DUMP, SYSIMG_DUMP);
@@ -3023,7 +3207,11 @@ JL_DLLEXPORT void jl_create_system_image(void **_native_data, jl_array_t *workli
         }
     }
     else if (_native_data != NULL) {
-        *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
+        precompilation_world = jl_atomic_load_acquire(&jl_world_counter);
+        if (jl_options.trim)
+            *_native_data = jl_precompile_trimmed(precompilation_world);
+        else
+            *_native_data = jl_precompile(jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL);
     }
 
     // Make sure we don't run any Julia code concurrently after this point
@@ -3571,6 +3759,19 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
                 memcpy(newitems, mod->usings.items, mod->usings.len * sizeof(void*));
                 mod->usings.items = newitems;
             }
+            // Move the binding bits back to their correct place
+#ifdef _P64
+            jl_svec_t *table = jl_atomic_load_relaxed(&mod->bindings);
+            for (size_t i = 0; i < jl_svec_len(table); i++) {
+                jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
+                if ((jl_value_t*)b == jl_nothing)
+                    continue;
+                jl_binding_partition_t *bpart = jl_atomic_load_relaxed(&b->partitions);
+                jl_atomic_store_relaxed(&bpart->restriction,
+                    encode_restriction((jl_value_t*)jl_atomic_load_relaxed(&bpart->restriction), bpart->reserved));
+                bpart->reserved = 0;
+            }
+#endif
         }
         else {
             abort();
diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c
index 2420890d08a94..6ecc8b6290c4e 100644
--- a/src/staticdata_utils.c
+++ b/src/staticdata_utils.c
@@ -159,7 +159,8 @@ static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited,
     if (jl_is_method(mod))
         mod = ((jl_method_t*)mod)->module;
     assert(jl_is_module(mod));
-    if (jl_atomic_load_relaxed(&mi->precompiled) || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
+    uint8_t is_precompiled = jl_atomic_load_relaxed(&mi->flags) & JL_MI_FLAGS_MASK_PRECOMPILED;
+    if (is_precompiled || !jl_object_in_image((jl_value_t*)mod) || type_in_worklist(mi->specTypes)) {
         return 1;
     }
     if (!mi->backedges) {
@@ -758,6 +759,16 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
     static jl_value_t *replace_depot_func = NULL;
     if (!replace_depot_func)
         replace_depot_func = jl_get_global(jl_base_module, jl_symbol("replace_depot_path"));
+    static jl_value_t *normalize_depots_func = NULL;
+    if (!normalize_depots_func)
+        normalize_depots_func = jl_get_global(jl_base_module, jl_symbol("normalize_depots_for_relocation"));
+
+    jl_value_t *depots = NULL, *prefs_hash = NULL, *prefs_list = NULL;
+    JL_GC_PUSH2(&depots, &prefs_list);
+    last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    depots = jl_apply(&normalize_depots_func, 1);
+    ct->world_age = last_age;
 
     // write a placeholder for total size so that we can quickly seek past all of the
     // dependencies if we don't need them
@@ -770,13 +781,14 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
 
         if (replace_depot_func) {
             jl_value_t **replace_depot_args;
-            JL_GC_PUSHARGS(replace_depot_args, 2);
+            JL_GC_PUSHARGS(replace_depot_args, 3);
             replace_depot_args[0] = replace_depot_func;
             replace_depot_args[1] = deppath;
+            replace_depot_args[2] = depots;
             ct = jl_current_task;
             size_t last_age = ct->world_age;
             ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
-            deppath = (jl_value_t*)jl_apply(replace_depot_args, 2);
+            deppath = (jl_value_t*)jl_apply(replace_depot_args, 3);
             ct->world_age = last_age;
             JL_GC_POP();
         }
@@ -809,9 +821,6 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
     write_int32(s, 0); // terminator, for ease of reading
 
     // Calculate Preferences hash for current package.
-    jl_value_t *prefs_hash = NULL;
-    jl_value_t *prefs_list = NULL;
-    JL_GC_PUSH1(&prefs_list);
     if (jl_base_module) {
         // Toplevel module is the module we're currently compiling, use it to get our preferences hash
         jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__"));
@@ -858,7 +867,7 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t* worklist, jl_array_t
         write_int32(s, 0);
         write_uint64(s, 0);
     }
-    JL_GC_POP(); // for prefs_list
+    JL_GC_POP(); // for depots, prefs_list
 
     // write a dummy file position to indicate the beginning of the source-text
     pos = ios_pos(s);
diff --git a/src/subtype.c b/src/subtype.c
index 3340abfc4bafc..65ee4d5916bce 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -65,7 +65,6 @@ typedef struct jl_varbinding_t {
     jl_value_t *lb;
     jl_value_t *ub;
     int8_t right;       // whether this variable came from the right side of `A <: B`
-    int8_t occurs;      // occurs in any position
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
@@ -179,7 +178,7 @@ static int current_env_length(jl_stenv_t *e)
 typedef struct {
     int8_t *buf;
     int rdepth;
-    int8_t _space[32]; // == 8 * 4
+    int8_t _space[24]; // == 8 * 3
     jl_gcframe_t gcframe;
     jl_value_t *roots[24]; // == 8 * 3
 } jl_savedenv_t;
@@ -208,7 +207,6 @@ static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
             roots[i++] = v->ub;
             roots[i++] = (jl_value_t*)v->innervars;
         }
-        se->buf[j++] = v->occurs;
         se->buf[j++] = v->occurs_inv;
         se->buf[j++] = v->occurs_cov;
         se->buf[j++] = v->max_offset;
@@ -243,7 +241,7 @@ static void alloc_env(jl_stenv_t *e, jl_savedenv_t *se, int root)
             ct->gcstack = &se->gcframe;
         }
     }
-    se->buf = (len > 8 ? (int8_t*)malloc_s(len * 4) : se->_space);
+    se->buf = (len > 8 ? (int8_t*)malloc_s(len * 3) : se->_space);
 #ifdef __clang_gcanalyzer__
     memset(se->buf, 0, len * 3);
 #endif
@@ -290,7 +288,6 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
             v->ub = roots[i++];
             v->innervars = (jl_array_t*)roots[i++];
         }
-        v->occurs = se->buf[j++];
         v->occurs_inv = se->buf[j++];
         v->occurs_cov = se->buf[j++];
         v->max_offset = se->buf[j++];
@@ -302,15 +299,6 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO
         memset(&e->envout[e->envidx], 0, (e->envsz - e->envidx)*sizeof(void*));
 }
 
-static void clean_occurs(jl_stenv_t *e)
-{
-    jl_varbinding_t *v = e->vars;
-    while (v) {
-        v->occurs = 0;
-        v = v->prev;
-    }
-}
-
 #define flip_offset(e) ((e)->Loffset *= -1)
 
 // type utilities
@@ -599,6 +587,8 @@ static jl_value_t *simple_meet(jl_value_t *a, jl_value_t *b, int overesi)
 
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param);
 
+#define has_next_union_state(e, R) ((((R) ? &(e)->Runions : &(e)->Lunions)->more) != 0)
+
 static int next_union_state(jl_stenv_t *e, int8_t R) JL_NOTSAFEPOINT
 {
     jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
@@ -679,8 +669,6 @@ static int subtype_left_var(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int par
 // of determining whether the variable is concrete.
 static void record_var_occurrence(jl_varbinding_t *vb, jl_stenv_t *e, int param) JL_NOTSAFEPOINT
 {
-    if (vb != NULL)
-        vb->occurs = 1;
     if (vb != NULL && param) {
         // saturate counters at 2; we don't need values bigger than that
         if (param == 2 && e->invdepth > vb->depth0) {
@@ -915,7 +903,7 @@ static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
     u = unalias_unionall(u, e);
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0,
                            e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
@@ -1316,6 +1304,7 @@ static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, in
 }
 
 static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e);
+static int has_exists_typevar(jl_value_t *x, jl_stenv_t *e) JL_NOTSAFEPOINT;
 
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
@@ -1324,7 +1313,31 @@ static int try_subtype_by_bounds(jl_value_t *a, jl_value_t *b, jl_stenv_t *e);
 static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
 {
     if (jl_is_uniontype(x)) {
-        if (x == y) return 1;
+        if (obviously_egal(x, y))
+            return 1;
+        if (e->Runions.depth == 0 && jl_is_typevar(y) && !jl_has_free_typevars(x)) {
+            // Similar to fast path for repeated elements: if there have been no outer
+            // unions on the right, and the right side is a typevar, then we can handle the
+            // typevar first before picking a union element, under the theory that it may
+            // be easy to match or reject this whole union in comparing and setting the lb
+            // and ub of the variable binding, without needing to examine each element.
+            // However, if x contains any free typevars, then each element with a free
+            // typevar must be handled separately from the union of all elements without
+            // free typevars, since the typevars presence might lead to those elements
+            // getting eliminated (omit_bad_union) or degenerate (Union{Ptr{T}, Ptr}) or
+            // combined (Union{T, S} where {T, S <: T}).
+            jl_tvar_t *yvar = (jl_tvar_t *)y;
+            jl_varbinding_t *yb = lookup(e, yvar);
+            while (e->intersection && yb != NULL && yb->lb == yb->ub && jl_is_typevar(yb->lb)) {
+                yvar = (jl_tvar_t *)yb->lb;
+                yb = lookup(e, yvar);
+            }
+            // Note: `x <: ∃y` performs a local ∀-∃ check between `x` and `yb->ub`.
+            // We need to ensure that there's no ∃ typevar as otherwise that check
+            // might cause false alarm due to the accumulated env change.
+            if (yb == NULL || yb->right == 0 || !has_exists_typevar(yb->ub, e))
+                return subtype_var(yvar, x, e, 1, param);
+        }
         x = pick_union_element(x, e, 0);
     }
     if (jl_is_uniontype(y)) {
@@ -2410,24 +2423,47 @@ static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e,
     if (obviously_egal(x, y))
         return x;
 
+    jl_varbinding_t *vars = NULL;
+    jl_varbinding_t *bbprev = NULL;
+    jl_varbinding_t *xb = jl_is_typevar(x) ? lookup(e, (jl_tvar_t *)x) : NULL;
+    jl_varbinding_t *yb = jl_is_typevar(y) ? lookup(e, (jl_tvar_t *)y) : NULL;
+    int simple_x = !jl_has_free_typevars(!jl_is_typevar(x) ? x : xb ? xb->ub : ((jl_tvar_t *)x)->ub);
+    int simple_y = !jl_has_free_typevars(!jl_is_typevar(y) ? y : yb ? yb->ub : ((jl_tvar_t *)y)->ub);
+    if (simple_x && simple_y && !(xb && yb)) {
+        vars = e->vars;
+        e->vars = xb ? xb : yb;
+        if (e->vars != NULL) {
+            bbprev = e->vars->prev;
+            e->vars->prev = NULL;
+        }
+    }
     jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int savedepth = e->invdepth;
     e->invdepth = depth;
     jl_value_t *res = intersect_all(x, y, e);
     e->invdepth = savedepth;
     pop_unionstate(&e->Runions, &oldRunions);
+    if (bbprev) e->vars->prev = bbprev;
+    if (vars) e->vars = vars;
     return res;
 }
 
 static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    if (param == 2 || (!jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u))) {
+    int no_free = !jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u);
+    if (param == 2 || no_free) {
         jl_value_t *a=NULL, *b=NULL;
         JL_GC_PUSH2(&a, &b);
+        jl_varbinding_t *vars = NULL;
+        if (no_free) {
+            vars = e->vars;
+            e->vars = NULL;
+        }
         jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
         a = R ? intersect_all(x, u->a, e) : intersect_all(u->a, x, e);
         b = R ? intersect_all(x, u->b, e) : intersect_all(u->b, x, e);
         pop_unionstate(&e->Runions, &oldRunions);
+        if (vars) e->vars = vars;
         jl_value_t *i = simple_join(a,b);
         JL_GC_POP();
         return i;
@@ -3312,7 +3348,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
 {
     jl_value_t *res = NULL;
     jl_savedenv_t se;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0, 0, 0,
                            e->invdepth, NULL, e->vars };
     JL_GC_PUSH4(&res, &vb.lb, &vb.ub, &vb.innervars);
     save_env(e, &se, 1);
@@ -3341,7 +3377,7 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
                 vb.ub = vb.var->ub;
             }
             restore_env(e, &se, vb.constraintkind == 1 ? 1 : 0);
-            vb.occurs = vb.occurs_cov = vb.occurs_inv = 0;
+            vb.occurs_cov = vb.occurs_inv = 0;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
@@ -4042,79 +4078,12 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
     return jl_bottom_type;
 }
 
-static int merge_env(jl_stenv_t *e, jl_savedenv_t *se, int count)
+static int merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se, int count)
 {
-    if (count == 0)
-        alloc_env(e, se, 1);
-    jl_value_t **roots = NULL;
-    int nroots = 0;
-    if (se->gcframe.nroots == JL_GC_ENCODE_PUSHARGS(1)) {
-        jl_svec_t *sv = (jl_svec_t*)se->roots[0];
-        assert(jl_is_svec(sv));
-        roots = jl_svec_data(sv);
-        nroots = jl_svec_len(sv);
-    }
-    else {
-        roots = se->roots;
-        nroots = se->gcframe.nroots >> 2;
-    }
-    int m = 0, n = 0;
-    jl_varbinding_t *v = e->vars;
-    while (v != NULL) {
-        if (count == 0) {
-            // need to initialize this
-            se->buf[m] = 0;
-            se->buf[m+1] = 0;
-            se->buf[m+2] = 0;
-            se->buf[m+3] = v->max_offset;
-        }
-        jl_value_t *b1, *b2;
-        if (v->occurs) {
-            // only merge lb/ub if this var occurs.
-            b1 = roots[n];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = v->lb;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            roots[n] = b1 ? simple_meet(b1, b2, 0) : b2;
-            b1 = roots[n+1];
-            JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-            b2 = v->ub;
-            JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-            roots[n+1] = b1 ? simple_join(b1, b2) : b2;
-            // record the meeted vars.
-            se->buf[m] = 1;
-        }
-        // `innervars` might be re-sorted inside `finish_unionall`.
-        // We'd better always merge it.
-        b1 = roots[n+2];
-        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-        b2 = (jl_value_t*)v->innervars;
-        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
-        if (b2 && b1 != b2) {
-            if (b1)
-                jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
-            else
-                roots[n+2] = b2;
-        }
-        // always merge occurs_inv/cov by max (never decrease)
-        if (v->occurs_inv > se->buf[m+1])
-            se->buf[m+1] = v->occurs_inv;
-        if (v->occurs_cov > se->buf[m+2])
-            se->buf[m+2] = v->occurs_cov;
-        // always merge max_offset by min
-        if (!v->intersected && v->max_offset < se->buf[m+3])
-            se->buf[m+3] = v->max_offset;
-        m = m + 4;
-        n = n + 3;
-        v = v->prev;
+    if (count == 0) {
+        save_env(e, me, 1);
+        return 1;
     }
-    assert(n == nroots); (void)nroots;
-    return count + 1;
-}
-
-// merge untouched vars' info.
-static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
-{
     jl_value_t **merged = NULL;
     jl_value_t **saved = NULL;
     int nroots = 0;
@@ -4136,47 +4105,49 @@ static void final_merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se)
     }
     assert(nroots == current_env_length(e) * 3);
     assert(nroots % 3 == 0);
-    for (int n = 0, m = 0; n < nroots; n += 3, m += 4) {
-        if (merged[n] == NULL)
-            merged[n] = saved[n];
-        if (merged[n+1] == NULL)
-            merged[n+1] = saved[n+1];
-        jl_value_t *b1, *b2;
+    int m = 0, n = 0;
+    jl_varbinding_t *v = e->vars;
+    while (v != NULL) {
+        jl_value_t *b0, *b1, *b2;
+        // merge `lb`
+        b0 = saved[n];
+        b1 = merged[n];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = v->lb;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+        merged[n] = (b1 == b0 || b2 == b0) ? b0 : simple_meet(b1, b2, 0);
+        // merge `ub`
+        b0 = saved[n+1];
+        b1 = merged[n+1];
+        JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
+        b2 = v->ub;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
+        merged[n+1] = (b1 == b0 || b2 == b0) ? b0 : simple_join(b1, b2);
+        // merge `innervars`
         b1 = merged[n+2];
         JL_GC_PROMISE_ROOTED(b1); // clang-sagc doesn't know this came from our GC frame
-        b2 = saved[n+2];
-        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know this came from our GC frame
+        b2 = (jl_value_t*)v->innervars;
+        JL_GC_PROMISE_ROOTED(b2); // clang-sagc doesn't know the fields of this are stack GC roots
         if (b2 && b1 != b2) {
             if (b1)
                 jl_array_ptr_1d_append((jl_array_t*)b1, (jl_array_t*)b2);
             else
                 merged[n+2] = b2;
         }
-        me->buf[m] |= se->buf[m];
-    }
-}
-
-static void expand_local_env(jl_stenv_t *e, jl_value_t *res)
-{
-    jl_varbinding_t *v = e->vars;
-    // Here we pull in some typevar missed in fastpath.
-    while (v != NULL) {
-        v->occurs = v->occurs || jl_has_typevar(res, v->var);
-        assert(v->occurs == 0 || v->occurs == 1);
-        v = v->prev;
-    }
-    v = e->vars;
-    while (v != NULL) {
-        if (v->occurs == 1) {
-            jl_varbinding_t *v2 = e->vars;
-            while (v2 != NULL) {
-                if (v2 != v && v2->occurs == 0)
-                    v2->occurs = -(jl_has_typevar(v->lb, v2->var) || jl_has_typevar(v->ub, v2->var));
-                v2 = v2->prev;
-            }
-        }
+        // merge occurs_inv/cov by max (never decrease)
+        if (v->occurs_inv > me->buf[m])
+            me->buf[m] = v->occurs_inv;
+        if (v->occurs_cov > me->buf[m+1])
+            me->buf[m+1] = v->occurs_cov;
+        // merge max_offset by min
+        if (!v->intersected && v->max_offset < me->buf[m+2])
+            me->buf[m+2] = v->max_offset;
+        m = m + 3;
+        n = n + 3;
         v = v->prev;
     }
+    assert(n == nroots); (void)nroots;
+    return count + 1;
 }
 
 static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
@@ -4189,26 +4160,31 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
     jl_savedenv_t se, me;
     save_env(e, &se, 1);
     int niter = 0, total_iter = 0;
-    clean_occurs(e);
     is[0] = intersect(x, y, e, 0); // root
-    if (is[0] != jl_bottom_type) {
-        expand_local_env(e, is[0]);
-        niter = merge_env(e, &me, niter);
+    if (is[0] == jl_bottom_type) {
+        restore_env(e, &se, 1);
+    }
+    else if (!e->emptiness_only && has_next_union_state(e, 1)) {
+        niter = merge_env(e, &me, &se, niter);
+        restore_env(e, &se, 1);
     }
-    restore_env(e, &se, 1);
     while (next_union_state(e, 1)) {
         if (e->emptiness_only && is[0] != jl_bottom_type)
             break;
         e->Runions.depth = 0;
         e->Runions.more = 0;
 
-        clean_occurs(e);
         is[1] = intersect(x, y, e, 0);
-        if (is[1] != jl_bottom_type) {
-            expand_local_env(e, is[1]);
-            niter = merge_env(e, &me, niter);
+        if (is[1] == jl_bottom_type) {
+            restore_env(e, &se, 1);
+        }
+        else if (niter > 0 || (!e->emptiness_only && has_next_union_state(e, 1))) {
+            niter = merge_env(e, &me, &se, niter);
+            restore_env(e, &se, 1);
+        }
+        else {
+            assert(is[0] == jl_bottom_type);
         }
-        restore_env(e, &se, 1);
         if (is[0] == jl_bottom_type)
             is[0] = is[1];
         else if (is[1] != jl_bottom_type) {
@@ -4216,13 +4192,18 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
             is[0] = jl_type_union(is, 2);
         }
         total_iter++;
-        if (niter > 4 || total_iter > 400000) {
+        if (has_next_union_state(e, 1) && (niter > 4 || total_iter > 400000)) {
             is[0] = y;
+            // we give up precise intersection here, just restore the saved env
+            restore_env(e, &se, 1);
+            if (niter > 0) {
+                free_env(&me);
+                niter = 0;
+            }
             break;
         }
     }
     if (niter) {
-        final_merge_env(e, &me, &se);
         restore_env(e, &me, 1);
         free_env(&me);
     }
@@ -4707,7 +4688,7 @@ static jl_value_t *_widen_diagonal(jl_value_t *t, jl_varbinding_t *troot) {
 
 static jl_value_t *widen_diagonal(jl_value_t *t, jl_unionall_t *u, jl_varbinding_t *troot)
 {
-    jl_varbinding_t vb = { u->var, NULL, NULL, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, troot };
+    jl_varbinding_t vb = { u->var, NULL, NULL, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, troot };
     jl_value_t *nt;
     JL_GC_PUSH2(&vb.innervars, &nt);
     if (jl_is_unionall(u->body))
@@ -4730,6 +4711,56 @@ JL_DLLEXPORT jl_value_t *jl_widen_diagonal(jl_value_t *t, jl_unionall_t *ua)
 }
 
 // specificity comparison
+static int count_missing_wrap(jl_value_t *x, jl_typeenv_t *env)
+{
+    if (!jl_has_free_typevars(x))
+        return 0;
+    jl_typeenv_t *wrapped = NULL;
+    int count = 0;
+    for (jl_typeenv_t *env2 = env; env2 != NULL; env2 = env2->prev) {
+        int need_wrap = 0;
+        for (jl_typeenv_t *env3 = wrapped; env3 != NULL && need_wrap == 0; env3 = env3->prev) {
+            if (env3->var == env2->var)
+                need_wrap = -1;
+            else if (jl_has_typevar(env3->var->lb, env2->var) || jl_has_typevar(env3->var->ub, env2->var))
+                need_wrap = 1;
+        }
+        need_wrap = need_wrap == 0 ? jl_has_typevar(x, env2->var) :
+                    need_wrap == -1 ? 0 : 1;
+        if (need_wrap) {
+            count++;
+            jl_typeenv_t *newenv = (jl_typeenv_t*)alloca(sizeof(jl_typeenv_t));
+            newenv->var = env2->var;
+            newenv->val = NULL;
+            newenv->prev = wrapped;
+            wrapped = newenv;
+        }
+    }
+    return count;
+}
+
+static int obvious_subtype_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *subtype, int wrapx, int wrapy)
+{
+    if (wrapx != 0 || wrapy != 0) {
+        int wrap_count = wrapx - wrapy;
+        while (wrap_count > 0 && jl_is_unionall(y))
+        {
+            y = ((jl_unionall_t*)y)->body;
+            wrap_count--;
+        }
+        while (wrap_count < 0 && jl_is_unionall(x))
+        {
+            x = ((jl_unionall_t*)x)->body;
+            wrap_count++;
+        }
+        if (wrap_count > 0) {
+            if (obvious_subtype(jl_unwrap_unionall(x), y, y0, subtype) && !*subtype)
+                return 1;
+            return 0;
+        }
+    }
+    return obvious_subtype(x, y, y0, subtype);
+}
 
 static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0, jl_typeenv_t *env)
 {
@@ -4752,12 +4783,14 @@ static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0,
         a = b;
         b = temp;
     }
+    int wrapa = count_missing_wrap(a, env);
+    int wrapb = count_missing_wrap(b, env);
     // first check if a <: b has an obvious answer
     int subtype_ab = 2;
     if (b == (jl_value_t*)jl_any_type || a == jl_bottom_type) {
         subtype_ab = 1;
     }
-    else if (obvious_subtype(a, b, b0, &subtype_ab)) {
+    else if (obvious_subtype_msp(a, b, b0, &subtype_ab, wrapa, wrapb)) {
 #ifdef NDEBUG
         if (subtype_ab == 0)
             return 0;
@@ -4771,7 +4804,7 @@ static int eq_msp(jl_value_t *a, jl_value_t *b, jl_value_t *a0, jl_value_t *b0,
     if (a == (jl_value_t*)jl_any_type || b == jl_bottom_type) {
         subtype_ba = 1;
     }
-    else if (obvious_subtype(b, a, a0, &subtype_ba)) {
+    else if (obvious_subtype_msp(b, a, a0, &subtype_ba, wrapb, wrapa)) {
 #ifdef NDEBUG
         if (subtype_ba == 0)
             return 0;
@@ -4836,7 +4869,9 @@ static int sub_msp(jl_value_t *x, jl_value_t *y, jl_value_t *y0, jl_typeenv_t *e
         return 1;
     }
     int obvious_sub = 2;
-    if (obvious_subtype(x, y, y0, &obvious_sub)) {
+    int wrapx = count_missing_wrap(x, env);
+    int wrapy = count_missing_wrap(y, env);
+    if (obvious_subtype_msp(x, y, y0, &obvious_sub, wrapx, wrapy)) {
 #ifdef NDEBUG
         return obvious_sub;
 #endif
diff --git a/src/support/arraylist.h b/src/support/arraylist.h
index 6ad2f0e2f28c9..a83bd2808756c 100644
--- a/src/support/arraylist.h
+++ b/src/support/arraylist.h
@@ -20,11 +20,11 @@ typedef struct {
     void *_space[AL_N_INLINE];
 } arraylist_t;
 
-arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT;
-void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT arraylist_t *arraylist_new(arraylist_t *a, size_t size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_free(arraylist_t *a) JL_NOTSAFEPOINT;
 
-void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
-void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;
 
 typedef struct {
@@ -34,11 +34,12 @@ typedef struct {
     void *_space[SMALL_AL_N_INLINE];
 } small_arraylist_t;
 
-small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT;
-void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
 
-void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
-void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
+JL_DLLEXPORT small_arraylist_t *small_arraylist_new(small_arraylist_t *a, uint32_t size) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
diff --git a/src/support/dtypes.h b/src/support/dtypes.h
index 57f4fa99f0016..6513370da4dae 100644
--- a/src/support/dtypes.h
+++ b/src/support/dtypes.h
@@ -123,6 +123,13 @@ typedef intptr_t ssize_t;
 #define STATIC_INLINE static inline
 #define FORCE_INLINE static inline __attribute__((always_inline))
 
+#ifdef _OS_WINDOWS_
+#define EXTERN_INLINE_DECLARE inline
+#else
+#define EXTERN_INLINE_DECLARE inline __attribute__ ((visibility("default")))
+#endif
+#define EXTERN_INLINE_DEFINE extern inline JL_DLLEXPORT
+
 #if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
 #  define NOINLINE __declspec(noinline)
 #  define NOINLINE_DECL(f) __declspec(noinline) f
diff --git a/src/symbol.c b/src/symbol.c
index 4b6c95730b0dd..ef2c11e0842e8 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -15,6 +15,7 @@
 extern "C" {
 #endif
 
+uv_mutex_t symtab_lock;
 static _Atomic(jl_sym_t*) symtab = NULL;
 
 #define MAX_SYM_LEN ((size_t)INTPTR_MAX - sizeof(jl_taggedvalue_t) - sizeof(jl_sym_t) - 1)
@@ -35,14 +36,10 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 {
     jl_sym_t *sym;
     size_t nb = symbol_nbytes(len);
-    jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc_nolock(nb, 0, sizeof(void*), 0);
+    jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc(nb, 0, sizeof(void*), 0);
     sym = (jl_sym_t*)jl_valueof(tag);
     // set to old marked so that we won't look at it in the GC or write barrier.
     jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED);
-#ifdef MMTK_GC
-    jl_ptls_t ptls = jl_current_task->ptls;
-    mmtk_immortal_post_alloc_fast(&ptls->mmtk_mutator, jl_valueof(tag), nb);
-#endif
     jl_atomic_store_relaxed(&sym->left, NULL);
     jl_atomic_store_relaxed(&sym->right, NULL);
     sym->hash = hash_symbol(str, len);
@@ -90,15 +87,15 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT // (or throw)
     _Atomic(jl_sym_t*) *slot;
     jl_sym_t *node = symtab_lookup(&symtab, str, len, &slot);
     if (node == NULL) {
-        uv_mutex_lock(&gc_perm_lock);
+        uv_mutex_lock(&symtab_lock);
         // Someone might have updated it, check and look up again
         if (jl_atomic_load_relaxed(slot) != NULL && (node = symtab_lookup(slot, str, len, &slot))) {
-            uv_mutex_unlock(&gc_perm_lock);
+            uv_mutex_unlock(&symtab_lock);
             return node;
         }
         node = mk_symbol(str, len);
         jl_atomic_store_release(slot, node);
-        uv_mutex_unlock(&gc_perm_lock);
+        uv_mutex_unlock(&symtab_lock);
     }
     return node;
 }
diff --git a/src/sys.c b/src/sys.c
index 107a8f7637763..fa9054bb93e9a 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -102,7 +102,6 @@ JL_DLLEXPORT int32_t jl_nb_available(ios_t *s)
 
 // --- dir/file stuff ---
 
-JL_DLLEXPORT int jl_sizeof_uv_fs_t(void) { return sizeof(uv_fs_t); }
 JL_DLLEXPORT char *jl_uv_fs_t_ptr(uv_fs_t *req) { return (char*)req->ptr; }
 JL_DLLEXPORT char *jl_uv_fs_t_path(uv_fs_t *req) { return (char*)req->path; }
 
@@ -478,25 +477,10 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT int jl_effective_threads(void) JL_NOTSAFEPOINT
 {
-    int cpu = jl_cpu_threads();
-    int masksize = uv_cpumask_size();
-    if (masksize < 0 || jl_running_under_rr(0))
-        return cpu;
-    uv_thread_t tid = uv_thread_self();
-    char *cpumask = (char *)calloc(masksize, sizeof(char));
-    int err = uv_thread_getaffinity(&tid, cpumask, masksize);
-    if (err) {
-        free(cpumask);
-        jl_safe_printf("WARNING: failed to get thread affinity (%s %d)\n", uv_err_name(err),
-                       err);
-        return cpu;
-    }
-    int n = 0;
-    for (size_t i = 0; i < masksize; i++) {
-        n += cpumask[i];
-    }
-    free(cpumask);
-    return n < cpu ? n : cpu;
+    // We want the more conservative estimate of the two.
+    int cpu_threads = jl_cpu_threads();
+    int available_parallelism = uv_available_parallelism();
+    return available_parallelism < cpu_threads ? available_parallelism : cpu_threads;
 }
 
 
@@ -787,26 +771,11 @@ JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT size_t jl_maxrss(void)
 {
-#if defined(_OS_WINDOWS_)
-    PROCESS_MEMORY_COUNTERS counter;
-    GetProcessMemoryInfo( GetCurrentProcess( ), &counter, sizeof(counter) );
-    return (size_t)counter.PeakWorkingSetSize;
-
-// FIXME: `rusage` is available on OpenBSD, DragonFlyBSD and NetBSD as well.
-//        All of them return `ru_maxrss` in kilobytes.
-#elif defined(_OS_LINUX_) || defined(_OS_DARWIN_) || defined (_OS_FREEBSD_) || defined (_OS_OPENBSD_)
-    struct rusage rusage;
-    getrusage( RUSAGE_SELF, &rusage );
-
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_) || defined (_OS_OPENBSD_)
-    return (size_t)(rusage.ru_maxrss * 1024);
-#else
-    return (size_t)rusage.ru_maxrss;
-#endif
-
-#else
-    return (size_t)0;
-#endif
+    uv_rusage_t rusage;
+    if (uv_getrusage(&rusage) == 0) {
+        return rusage.ru_maxrss * 1024;
+    }
+    return 0;
 }
 
 // Simple `rand()` like function, with global seed and added thread-safety
diff --git a/src/task.c b/src/task.c
index 32b03028de2b1..33904d8512a55 100644
--- a/src/task.c
+++ b/src/task.c
@@ -49,27 +49,27 @@ extern "C" {
 // c.f. interceptor in jl_dlopen as well
 void (*real_siglongjmp)(jmp_buf _Buf, int _Value) = NULL;
 #endif
-static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) {
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) {
     if (to->copy_stack)
-        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+        __sanitizer_start_switch_fiber(&from->asan_fake_stack, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize);
     else
-        __sanitizer_start_switch_fiber(&from->ctx.asan_fake_stack, to->stkbuf, to->bufsz);
+        __sanitizer_start_switch_fiber(&from->asan_fake_stack, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) {
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) {
     if (to->copy_stack)
-        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase-ptls->stacksize, ptls->stacksize);
+        __sanitizer_start_switch_fiber(NULL, (char*)ptls->stackbase - ptls->stacksize, ptls->stacksize);
     else
         __sanitizer_start_switch_fiber(NULL, to->stkbuf, to->bufsz);
 }
-static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) {
-    __sanitizer_finish_switch_fiber(current->ctx.asan_fake_stack, NULL, NULL);
+static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) {
+    __sanitizer_finish_switch_fiber(current->asan_fake_stack, NULL, NULL);
         //(const void**)&last->stkbuf,
         //&last->bufsz);
 }
 #else
-static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT {}
-static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_task_t *to) JL_NOTSAFEPOINT {}
-static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *current) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber(jl_ptls_t ptls, jl_ucontext_t *from, jl_ucontext_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_start_switch_fiber_killed(jl_ptls_t ptls, jl_ucontext_t *to) JL_NOTSAFEPOINT {}
+static inline void sanitizer_finish_switch_fiber(jl_ucontext_t *last, jl_ucontext_t *current) JL_NOTSAFEPOINT {}
 #endif
 
 #if defined(_COMPILER_TSAN_ENABLED_)
@@ -85,19 +85,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
         jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
         __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
     } while (0)
-#ifdef COPY_STACKS
-#define tsan_destroy_copyctx(_ptls, _ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \
-            __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \
-        } \
-        _tsan_macro_ctx->tsan_state = NULL; \
-    } while (0)
-#define tsan_switch_to_copyctx(_ctx) do { \
-        struct jl_stack_context_t *_tsan_macro_ctx = (_ctx); \
-        __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
-    } while (0)
-#endif
 #else
 // just do minimal type-checking on the arguments
 #define tsan_destroy_ctx(_ptls, _ctx) do { \
@@ -108,16 +95,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
         jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
         (void)_tsan_macro_ctx; \
     } while (0)
-#ifdef COPY_STACKS
-#define tsan_destroy_copyctx(_ptls, _ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        (void)_tsan_macro_ctx; \
-    } while (0)
-#define tsan_switch_to_copyctx(_ctx) do { \
-        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
-        (void)_tsan_macro_ctx; \
-    } while (0)
-#endif
 #endif
 
 // empirically, jl_finish_task needs about 64k stack space to infer/run
@@ -134,7 +111,6 @@ static inline void sanitizer_finish_switch_fiber(jl_task_t *last, jl_task_t *cur
 #define ROOT_TASK_STACK_ADJUSTMENT 3000000
 #endif
 
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT;
 static void jl_set_fiber(jl_ucontext_t *t);
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t);
 static void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t);
@@ -214,17 +190,17 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     assert(stackbase > frame_addr);
     size_t nb = stackbase - frame_addr;
     void *buf;
-    if (lastt->bufsz < nb) {
-        asan_free_copy_stack(lastt->stkbuf, lastt->bufsz);
+    if (lastt->ctx.bufsz < nb) {
+        asan_free_copy_stack(lastt->ctx.stkbuf, lastt->ctx.bufsz);
         buf = (void*)jl_gc_alloc_buf(ptls, nb);
-        lastt->stkbuf = buf;
-        lastt->bufsz = nb;
+        lastt->ctx.stkbuf = buf;
+        lastt->ctx.bufsz = nb;
     }
     else {
-        buf = lastt->stkbuf;
+        buf = lastt->ctx.stkbuf;
     }
     *pt = NULL; // clear the gc-root for the target task before copying the stack for saving
-    lastt->copy_stack = nb;
+    lastt->ctx.copy_stack = nb;
     lastt->sticky = 1;
     memcpy_stack_a16((uint64_t*)buf, (uint64_t*)frame_addr, nb);
     // this task's stack could have been modified after
@@ -233,58 +209,101 @@ static void NOINLINE save_stack(jl_ptls_t ptls, jl_task_t *lastt, jl_task_t **pt
     jl_gc_wb_back(lastt);
 }
 
-JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, char *p)
+JL_NO_ASAN static void NOINLINE JL_NORETURN restore_stack(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
 {
     size_t nb = t->copy_stack;
     char *_x = (char*)ptls->stackbase - nb;
     if (!p) {
         // switch to a stackframe that's beyond the bounds of the last switch
-        p = _x;
-        if ((char*)&_x > _x) {
-            p = (char*)alloca((char*)&_x - _x);
+        p = _x - 4096;
+        if ((char*)&_x > p) {
+            p = (char*)alloca((char*)&_x - p);
         }
         restore_stack(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
     }
     void *_y = t->stkbuf;
     assert(_x != NULL && _y != NULL);
+#if defined(_OS_WINDOWS_) // this platform does not implement CFI_NORETURN correctly or at all in libunwind (or equivalent) which requires a workaround
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+    void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
+    assert(*return_address == __builtin_return_address(0));
+    *return_address = NULL;
+#else
+#pragma message("warning: CFI_NORETURN not implemented for this platform, so profiling of copy_stacks may segfault in this build")
+#endif
+#else
+CFI_NORETURN
+#endif
     memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
 
 #if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx.copy_ctx);
+    jl_setcontext(t->copy_ctx);
 #else
-    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+    jl_longjmp(t->copy_ctx->uc_mcontext, 1);
 #endif
     abort(); // unreachable
 }
 
-JL_NO_ASAN static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
+JL_NO_ASAN static void restore_stack2(jl_ucontext_t *t, jl_ptls_t ptls, jl_ucontext_t *lastt)
 {
     assert(t->copy_stack && !lastt->copy_stack);
     size_t nb = t->copy_stack;
-    char *_x = (char*)ptls->stackbase - nb;
-    void *_y = t->stkbuf;
-    assert(_x != NULL && _y != NULL);
-    memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
+    if (nb > 1) {
+        char *_x = (char*)ptls->stackbase - nb;
+        void *_y = t->stkbuf;
+        assert(_x != NULL && _y != NULL);
+        memcpy_stack_a16((uint64_t*)_x, (uint64_t*)_y, nb);
+    }
+#if defined(_OS_WINDOWS_)
+    // jl_swapcontext and setjmp are the same on Windows, so we can just use swapcontext directly
+    tsan_switch_to_ctx(t);
+    jl_swapcontext(lastt->ctx, t->copy_ctx);
+#else
 #if defined(JL_HAVE_UNW_CONTEXT)
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx.ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-#elif defined(JL_HAVE_ASM) || defined(_OS_WINDOWS_)
-    if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0))
+#elif defined(JL_HAVE_ASM)
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
 #else
 #error COPY_STACKS is incompatible with this platform
 #endif
-    tsan_switch_to_copyctx(&t->ctx);
-#if defined(_OS_WINDOWS_)
-    jl_setcontext(&t->ctx.copy_ctx);
+    tsan_switch_to_ctx(t);
+    jl_longjmp(t->copy_ctx->uc_mcontext, 1);
+#endif
+}
+
+JL_NO_ASAN static void NOINLINE restore_stack3(jl_ucontext_t *t, jl_ptls_t ptls, char *p)
+{
+#if !defined(JL_HAVE_ASM)
+    char *_x = (char*)ptls->stackbase;
+    if (!p) {
+        // switch to a stackframe that's well beyond the bounds of the next switch
+        p = _x - 4096;
+        if ((char*)&_x > p) {
+            p = (char*)alloca((char*)&_x - p);
+        }
+        restore_stack3(t, ptls, p); // pass p to ensure the compiler can't tailcall this or avoid the alloca
+    }
+#endif
+#if defined(_OS_WINDOWS_) // this platform does not implement CFI_NORETURN correctly or at all in libunwind (or equivalent) which requires a workaround
+#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
+    void *volatile *return_address = (void *volatile *)__builtin_frame_address(0) + 1;
+    assert(*return_address == __builtin_return_address(0));
+    *return_address = NULL;
+#endif
 #else
-    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+CFI_NORETURN
 #endif
+    tsan_switch_to_ctx(t);
+    jl_start_fiber_set(t); // (doesn't return)
+    abort();
 }
+
 #endif
 
 /* Rooted by the base module */
@@ -298,9 +317,9 @@ void JL_NORETURN jl_finish_task(jl_task_t *ct)
         jl_atomic_store_release(&ct->_state, JL_TASK_STATE_FAILED);
     else
         jl_atomic_store_release(&ct->_state, JL_TASK_STATE_DONE);
-    if (ct->copy_stack) { // early free of stkbuf
-        asan_free_copy_stack(ct->stkbuf, ct->bufsz);
-        ct->stkbuf = NULL;
+    if (ct->ctx.copy_stack) { // early free of stkbuf
+        asan_free_copy_stack(ct->ctx.stkbuf, ct->ctx.bufsz);
+        ct->ctx.stkbuf = NULL;
     }
     // ensure that state is cleared
     ct->ptls->in_finalizer = 0;
@@ -344,33 +363,33 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid
     if (ptls2) {
         *ptid = jl_atomic_load_relaxed(&task->tid);
 #ifdef COPY_STACKS
-        if (task->copy_stack) {
+        if (task->ctx.copy_stack) {
             *size = ptls2->stacksize;
             return (char *)ptls2->stackbase - *size;
         }
 #endif
     }
-    *size = task->bufsz - off;
-    return (void *)((char *)task->stkbuf + off);
+    *size = task->ctx.bufsz - off;
+    return (void *)((char *)task->ctx.stkbuf + off);
 }
 
 JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
                                        char **active_start, char **active_end,
                                        char **total_start, char **total_end)
 {
-    if (!task->started) {
+    if (!task->ctx.started) {
         *total_start = *active_start = 0;
         *total_end = *active_end = 0;
         return;
     }
 
     jl_ptls_t ptls2 = task->ptls;
-    if (task->copy_stack && ptls2) {
+    if (task->ctx.copy_stack && ptls2) {
         *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize;
         *total_end = *active_end = (char*)ptls2->stackbase;
     }
-    else if (task->stkbuf) {
-        *total_start = *active_start = (char*)task->stkbuf;
+    else if (task->ctx.stkbuf) {
+        *total_start = *active_start = (char*)task->ctx.stkbuf;
 #ifndef _OS_WINDOWS_
         jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
         if (ptls0->root_task == task) {
@@ -383,12 +402,12 @@ JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
         }
 #endif
 
-        *total_end = *active_end = (char*)task->stkbuf + task->bufsz;
+        *total_end = *active_end = (char*)task->ctx.stkbuf + task->ctx.bufsz;
 #ifdef COPY_STACKS
         // save_stack stores the stack of an inactive task in stkbuf, and the
         // actual number of used bytes in copy_stack.
-        if (task->copy_stack > 1)
-            *active_end = (char*)task->stkbuf + task->copy_stack;
+        if (task->ctx.copy_stack > 1)
+            *active_end = (char*)task->ctx.stkbuf + task->ctx.copy_stack;
 #endif
     }
     else {
@@ -449,20 +468,16 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
 #endif
 
     int killed = jl_atomic_load_relaxed(&lastt->_state) != JL_TASK_STATE_RUNNABLE;
-    if (!t->started && !t->copy_stack) {
+    if (!t->ctx.started && !t->ctx.copy_stack) {
         // may need to allocate the stack
-        if (t->stkbuf == NULL) {
-            t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
-            if (t->stkbuf == NULL) {
+        if (t->ctx.stkbuf == NULL) {
+            t->ctx.stkbuf = jl_malloc_stack(&t->ctx.bufsz, t);
+            if (t->ctx.stkbuf == NULL) {
 #ifdef COPY_STACKS
                 // fall back to stack copying if mmap fails
-                t->copy_stack = 1;
+                t->ctx.copy_stack = 1;
+                t->ctx.bufsz = 0;
                 t->sticky = 1;
-                t->bufsz = 0;
-                if (always_copy_stacks)
-                    memcpy(&t->ctx.copy_ctx, &ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
-                else
-                    memcpy(&t->ctx.ctx, &ptls->base_ctx, sizeof(t->ctx.ctx));
 #else
                 jl_throw(jl_memory_exception);
 #endif
@@ -470,28 +485,45 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
         }
     }
 
+    union {
+        _jl_ucontext_t ctx;
+        jl_stack_context_t copy_ctx;
+    } lasttstate;
+
     if (killed) {
         *pt = NULL; // can't fail after here: clear the gc-root for the target task now
         lastt->gcstack = NULL;
         lastt->eh = NULL;
-        if (!lastt->copy_stack && lastt->stkbuf) {
+        if (!lastt->ctx.copy_stack && lastt->ctx.stkbuf) {
             // early free of stkbuf back to the pool
             jl_release_task_stack(ptls, lastt);
         }
     }
     else {
+        if (lastt->ctx.copy_stack) { // save the old copy-stack
+#ifdef _OS_WINDOWS_
+            lasttstate.copy_ctx.uc_stack.ss_sp = (char*)ptls->stackbase - ptls->stacksize;
+            lasttstate.copy_ctx.uc_stack.ss_size = ptls->stacksize;
+#endif
 #ifdef COPY_STACKS
-        if (lastt->copy_stack) { // save the old copy-stack
-            save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
-            if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) {
-                sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
-                // TODO: mutex unlock the thread we just switched from
+            if (jl_setjmp(lasttstate.copy_ctx.uc_mcontext, 0)) {
+#ifdef MIGRATE_TASKS
+                ptls = lastt->ptls;
+#endif
+                lastt->ctx.copy_ctx = NULL;
+                sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx);
                 return;
             }
-        }
-        else
+            save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
+            lastt->ctx.copy_ctx = &lasttstate.copy_ctx;
+#else
+            abort();
 #endif
-        *pt = NULL; // can't fail after here: clear the gc-root for the target task now
+        }
+        else {
+            *pt = NULL; // can't fail after here: clear the gc-root for the target task now
+            lastt->ctx.ctx = &lasttstate.ctx;
+        }
     }
 
     // set up global state for new task and clear global state for old task
@@ -506,41 +538,44 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
     ptls->previous_task = lastt;
 #endif
 
-    if (t->started) {
+    if (t->ctx.started) {
+        if (t->ctx.copy_stack) {
 #ifdef COPY_STACKS
-        if (t->copy_stack) {
-            if (lastt->copy_stack) {
+            if (lastt->ctx.copy_stack) {
                 // Switching from copystack to copystack. Clear any shadow stack
                 // memory above the saved shadow stack.
-                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->copy_stack;
+                uintptr_t stacktop = (uintptr_t)ptls->stackbase - t->ctx.copy_stack;
                 uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
                 if (stackbottom < stacktop)
-                    asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+                    asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
             }
-            if (!killed && !lastt->copy_stack) {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
-                restore_stack2(t, ptls, lastt);
-            } else {
-                tsan_switch_to_copyctx(&t->ctx);
+            if (!killed && !lastt->ctx.copy_stack) {
+                sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+                restore_stack2(&t->ctx, ptls, &lastt->ctx); // half jl_swap_fiber and half restore_stack
+            }
+            else {
+                tsan_switch_to_ctx(&t->ctx);
                 if (killed) {
-                    sanitizer_start_switch_fiber_killed(ptls, t);
-                    tsan_destroy_copyctx(ptls, &lastt->ctx);
-                } else {
-                    sanitizer_start_switch_fiber(ptls, lastt, t);
+                    sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
+                    tsan_destroy_ctx(ptls, &lastt->ctx);
+                }
+                else {
+                    sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
                 }
 
-                if (lastt->copy_stack) {
-                    restore_stack(t, ptls, NULL); // (doesn't return)
+                if (lastt->ctx.copy_stack) {
+                    restore_stack(&t->ctx, ptls, NULL); // (doesn't return)
+                    abort();
                 }
                 else {
-                    restore_stack(t, ptls, (char*)1); // (doesn't return)
+                    restore_stack(&t->ctx, ptls, (char*)1); // (doesn't return)
+                    abort();
                 }
             }
-        }
-        else
 #endif
-        {
-            if (lastt->copy_stack) {
+        }
+        else {
+            if (lastt->ctx.copy_stack) {
                 // Switching away from a copystack to a non-copystack. Clear
                 // the whole shadow stack now, because otherwise we won't know
                 // how much stack memory to clear the next time we switch to
@@ -549,22 +584,23 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
                 uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
                 // We're not restoring the stack, but we still need to unpoison the
                 // stack, so it starts with a pristine stack.
-                asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+                asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
             }
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_set_fiber(&t->ctx); // (doesn't return)
                 abort(); // unreachable
             }
             else {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
-                if (lastt->copy_stack) {
+                sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+                if (lastt->ctx.copy_stack) {
                     // Resume at the jl_setjmp earlier in this function,
                     // don't do a full task swap
                     tsan_switch_to_ctx(&t->ctx);
                     jl_set_fiber(&t->ctx); // (doesn't return)
+                    abort();
                 }
                 else {
                     jl_swap_fiber(&lastt->ctx, &t->ctx);
@@ -573,41 +609,58 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
         }
     }
     else {
-        if (lastt->copy_stack) {
+#ifdef _COMPILER_TSAN_ENABLED_
+        t->ctx.tsan_state = __tsan_create_fiber(0);
+#endif
+        if (lastt->ctx.copy_stack) {
             uintptr_t stacktop = (uintptr_t)ptls->stackbase;
             uintptr_t stackbottom = ((uintptr_t)jl_get_frame_addr() & ~15);
             // We're not restoring the stack, but we still need to unpoison the
             // stack, so it starts with a pristine stack.
-            asan_unpoison_stack_memory(stackbottom, stacktop-stackbottom);
+            asan_unpoison_stack_memory(stackbottom, stacktop - stackbottom);
         }
-        if (t->copy_stack && always_copy_stacks) {
+        if (t->ctx.copy_stack) {
+#ifdef COPY_STACKS
             tsan_switch_to_ctx(&t->ctx);
+            // create a temporary non-copy_stack context for starting this fiber
+            jl_ucontext_t ctx = t->ctx;
+            ctx.ctx = NULL;
+            ctx.stkbuf = (char*)ptls->stackbase - ptls->stacksize;
+            ctx.bufsz = ptls->stacksize;
+            ctx.copy_stack = 0;
+            ctx.started = 0;
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
-            } else {
-                sanitizer_start_switch_fiber(ptls, lastt, t);
+                if (lastt->ctx.copy_stack)
+                    restore_stack3(&ctx, ptls, NULL); // (doesn't return)
+                else
+                    jl_start_fiber_set(&ctx);
+                abort();
+            }
+            sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+            if (lastt->ctx.copy_stack) {
+                restore_stack3(&ctx, ptls, NULL); // (doesn't return)
+                abort();
+            }
+            else {
+                jl_start_fiber_swap(&lastt->ctx, &ctx);
             }
-#ifdef COPY_STACKS
-#if defined(_OS_WINDOWS_)
-            jl_setcontext(&t->ctx.copy_ctx);
 #else
-            jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+            abort();
 #endif
-#endif
-            abort(); // unreachable
         }
         else {
             if (killed) {
-                sanitizer_start_switch_fiber_killed(ptls, t);
+                sanitizer_start_switch_fiber_killed(ptls, &t->ctx);
                 tsan_switch_to_ctx(&t->ctx);
                 tsan_destroy_ctx(ptls, &lastt->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
             }
-            sanitizer_start_switch_fiber(ptls, lastt, t);
-            if (lastt->copy_stack) {
-                // Resume at the jl_setjmp earlier in this function
+            sanitizer_start_switch_fiber(ptls, &lastt->ctx, &t->ctx);
+            if (lastt->ctx.copy_stack) {
+                // copy_stack resumes at the jl_setjmp earlier in this function, so don't swap here
                 tsan_switch_to_ctx(&t->ctx);
                 jl_start_fiber_set(&t->ctx); // (doesn't return)
                 abort();
@@ -617,7 +670,14 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
             }
         }
     }
-    sanitizer_finish_switch_fiber(ptls->previous_task, jl_atomic_load_relaxed(&ptls->current_task));
+
+#ifdef MIGRATE_TASKS
+    ptls = lastt->ptls;
+#endif
+    assert(ptls);
+    assert(lastt == jl_atomic_load_relaxed(&ptls->current_task));
+    lastt->ctx.ctx = NULL;
+    sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &lastt->ctx);
 }
 
 JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
@@ -629,7 +689,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
         return;
     }
     int8_t gc_state = jl_gc_unsafe_enter(ptls);
-    if (t->started && t->stkbuf == NULL)
+    if (t->ctx.started && t->ctx.stkbuf == NULL)
         jl_error("attempt to switch to exited task");
     if (ptls->in_finalizer)
         jl_error("task switch not allowed from inside gc finalizer");
@@ -654,7 +714,7 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
     ptls->previous_task = NULL;
     assert(t != ct);
     assert(jl_atomic_load_relaxed(&t->tid) == ptls->tid);
-    if (!t->sticky && !t->copy_stack)
+    if (!t->sticky && !t->ctx.copy_stack)
         jl_atomic_store_release(&t->tid, -1);
 #else
     assert(ptls == ct->ptls);
@@ -711,48 +771,31 @@ JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e, jl_task_t *ct)
 #define pop_timings_stack() /* Nothing */
 #endif
 
-#define throw_internal_body(altstack)                                          \
-    assert(!jl_get_safe_restore());                                            \
-    jl_ptls_t ptls = ct->ptls;                                                 \
-    ptls->io_wait = 0;                                                         \
-    jl_gc_unsafe_enter(ptls);                                                  \
-    if (exception) {                                                           \
-        /* The temporary ptls->bt_data is rooted by special purpose code in the\
-           GC. This exists only for the purpose of preserving bt_data until we \
-           set ptls->bt_size=0 below. */                                       \
-        jl_push_excstack(ct, &ct->excstack, exception,                         \
-                         ptls->bt_data, ptls->bt_size);                        \
-        ptls->bt_size = 0;                                                     \
-    }                                                                          \
-    assert(ct->excstack && ct->excstack->top);                                 \
-    jl_handler_t *eh = ct->eh;                                                 \
-    if (eh != NULL) {                                                          \
-        if (altstack) ptls->sig_exception = NULL;                              \
-        pop_timings_stack()                                                    \
-        asan_unpoison_task_stack(ct, &eh->eh_ctx);                             \
-        jl_longjmp(eh->eh_ctx, 1);                                             \
-    }                                                                          \
-    else {                                                                     \
-        jl_no_exc_handler(exception, ct);                                      \
-    }                                                                          \
-    assert(0);
-
 static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
 {
-CFI_NORETURN
     JL_GC_PUSH1(&exception);
-    throw_internal_body(0);
-    jl_unreachable();
-}
-
-/* On the signal stack, we don't want to create any asan frames, but we do on the
-   normal, stack, so we split this function in two, depending on which context
-   we're calling it in. This also lets us avoid making a GC frame on the altstack,
-   which might end up getting corrupted if we recur here through another signal. */
-JL_NO_ASAN static void JL_NORETURN throw_internal_altstack(jl_task_t *ct, jl_value_t *exception)
-{
-CFI_NORETURN
-    throw_internal_body(1);
+    jl_ptls_t ptls = ct->ptls;
+    ptls->io_wait = 0;
+    jl_gc_unsafe_enter(ptls);
+    if (exception) {
+        /* The temporary ptls->bt_data is rooted by special purpose code in the\
+           GC. This exists only for the purpose of preserving bt_data until we
+           set ptls->bt_size=0 below. */
+        jl_push_excstack(ct, &ct->excstack, exception,
+                         ptls->bt_data, ptls->bt_size);
+        ptls->bt_size = 0;
+    }
+    assert(ct->excstack && ct->excstack->top);
+    jl_handler_t *eh = ct->eh;
+    if (eh != NULL) {
+        pop_timings_stack()
+        asan_unpoison_task_stack(ct, &eh->eh_ctx);
+        jl_longjmp(eh->eh_ctx, 1);
+    }
+    else {
+        jl_no_exc_handler(exception, ct);
+    }
+    assert(0);
     jl_unreachable();
 }
 
@@ -782,24 +825,6 @@ JL_DLLEXPORT void jl_rethrow(void)
     throw_internal(ct, NULL);
 }
 
-// Special case throw for errors detected inside signal handlers.  This is not
-// (cannot be) called directly in the signal handler itself, but is returned to
-// after the signal handler exits.
-JL_DLLEXPORT JL_NO_ASAN void JL_NORETURN jl_sig_throw(void)
-{
-CFI_NORETURN
-    jl_jmp_buf *safe_restore = jl_get_safe_restore();
-    jl_task_t *ct = jl_current_task;
-    if (safe_restore) {
-        asan_unpoison_task_stack(ct, safe_restore);
-        jl_longjmp(*safe_restore, 1);
-    }
-    jl_ptls_t ptls = ct->ptls;
-    jl_value_t *e = ptls->sig_exception;
-    JL_GC_PROMISE_ROOTED(e);
-    throw_internal_altstack(ct, e);
-}
-
 JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED)
 {
     // TODO: Should uses of `rethrow(exc)` be replaced with a normal throw, now
@@ -1071,26 +1096,28 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type);
     jl_set_typetagof(t, jl_task_tag, 0);
     JL_PROBE_RT_NEW_TASK(ct, t);
-    t->copy_stack = 0;
+    t->ctx.copy_stack = 0;
     if (ssize == 0) {
         // stack size unspecified; use default
         if (always_copy_stacks) {
-            t->copy_stack = 1;
-            t->bufsz = 0;
+            t->ctx.copy_stack = 1;
+            t->ctx.bufsz = 0;
         }
         else {
-            t->bufsz = JL_STACK_SIZE;
+            t->ctx.bufsz = JL_STACK_SIZE;
         }
-        t->stkbuf = NULL;
+        t->ctx.stkbuf = NULL;
     }
     else {
         // user requested dedicated stack of a certain size
         if (ssize < MINSTKSZ)
             ssize = MINSTKSZ;
-        t->bufsz = ssize;
-        t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
-        if (t->stkbuf == NULL)
+        t->ctx.bufsz = ssize;
+        t->ctx.stkbuf = jl_malloc_stack(&t->ctx.bufsz, t);
+        if (t->ctx.stkbuf == NULL) {
+            t->ctx.bufsz = 0;
             jl_throw(jl_memory_exception);
+        }
     }
     t->next = jl_nothing;
     t->queue = jl_nothing;
@@ -1109,30 +1136,21 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
     t->sticky = 1;
     t->gcstack = NULL;
     t->excstack = NULL;
-    t->started = 0;
+    t->ctx.started = 0;
     t->priority = 0;
-    jl_atomic_store_relaxed(&t->tid, t->copy_stack ? jl_atomic_load_relaxed(&ct->tid) : -1); // copy_stacks are always pinned since they can't be moved
+    jl_atomic_store_relaxed(&t->tid, -1);
     t->threadpoolid = ct->threadpoolid;
     t->ptls = NULL;
     t->world_age = ct->world_age;
     t->reentrant_timing = 0;
     jl_timing_task_init(t);
 
-#ifdef COPY_STACKS
-    if (!t->copy_stack) {
-#if defined(JL_DEBUG_BUILD)
-        memset(&t->ctx, 0, sizeof(t->ctx));
-#endif
-    }
-    else {
-        if (always_copy_stacks)
-            memcpy(&t->ctx.copy_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
-        else
-            memcpy(&t->ctx.ctx, &ct->ptls->base_ctx, sizeof(t->ctx.ctx));
-    }
-#endif
+    if (t->ctx.copy_stack)
+        t->ctx.copy_ctx = NULL;
+    else
+        t->ctx.ctx = NULL;
 #ifdef _COMPILER_TSAN_ENABLED_
-    t->ctx.tsan_state = __tsan_create_fiber(0);
+    t->ctx.tsan_state = NULL;
 #endif
 #ifdef _COMPILER_ASAN_ENABLED_
     t->ctx.asan_fake_stack = NULL;
@@ -1196,7 +1214,7 @@ CFI_NORETURN
     jl_task_t *ct = jl_current_task;
 #endif
     jl_ptls_t ptls = ct->ptls;
-    sanitizer_finish_switch_fiber(ptls->previous_task, ct);
+    sanitizer_finish_switch_fiber(&ptls->previous_task->ctx, &ct->ctx);
     _start_task();
 }
 
@@ -1211,6 +1229,7 @@ CFI_NORETURN
     jl_task_t *ct = jl_current_task;
     JL_GC_PUSH1(&ct);
 #endif
+    ct->ctx.ctx = NULL;
     jl_ptls_t ptls = ct->ptls;
     jl_value_t *res;
     assert(ptls->finalizers_inhibited == 0);
@@ -1218,11 +1237,11 @@ CFI_NORETURN
 #ifdef MIGRATE_TASKS
     jl_task_t *pt = ptls->previous_task;
     ptls->previous_task = NULL;
-    if (!pt->sticky && !pt->copy_stack)
+    if (!pt->sticky && !pt->ctx.copy_stack)
         jl_atomic_store_release(&pt->tid, -1);
 #endif
 
-    ct->started = 1;
+    ct->ctx.started = 1;
     JL_PROBE_RT_START_TASK(ct);
     jl_timing_block_task_enter(ct, ptls, NULL);
     if (jl_atomic_load_relaxed(&ct->_isexception)) {
@@ -1260,64 +1279,52 @@ skip_pop_exception:;
 #ifdef _OS_WINDOWS_
 #define setcontext jl_setcontext
 #define swapcontext jl_swapcontext
-#define makecontext jl_makecontext
 #endif
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
+static int make_fiber(jl_ucontext_t *t, _jl_ucontext_t *ctx)
 {
 #ifndef _OS_WINDOWS_
-    int r = getcontext(t);
-    if (r != 0)
-        jl_error("getcontext failed");
+    int r = getcontext(ctx);
+    if (r != 0) abort();
 #endif
-    void *stk = jl_malloc_stack(ssize, owner);
-    if (stk == NULL)
-        return NULL;
-    t->uc_stack.ss_sp = stk;
-    t->uc_stack.ss_size = *ssize;
+    ctx->uc_stack.ss_sp = (char*)t->stkbuf;
+    ctx->uc_stack.ss_size = t->bufsz;
 #ifdef _OS_WINDOWS_
-    makecontext(t, &start_task);
+    jl_makecontext(ctx, &start_task);
 #else
-    t->uc_link = NULL;
-    makecontext(t, &start_task, 0);
+    ctx->uc_link = NULL;
+    makecontext(ctx, &start_task, 0);
 #endif
-    return (char*)stk;
+    return 1;
 }
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    setcontext(&t->ctx);
+    _jl_ucontext_t ctx;
+    make_fiber(t, &ctx);
+    setcontext(&ctx);
 }
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
+    _jl_ucontext_t ctx;
+    make_fiber(t, &ctx);
     assert(lastt);
     tsan_switch_to_ctx(t);
-    swapcontext(&lastt->ctx, &t->ctx);
+    swapcontext(lastt->ctx, &ctx);
 }
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     tsan_switch_to_ctx(t);
-    swapcontext(&lastt->ctx, &t->ctx);
+    swapcontext(lastt->ctx, t->ctx);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    setcontext(&t->ctx);
-}
-#endif
-
-#if defined(JL_HAVE_UNW_CONTEXT) || defined(JL_HAVE_ASM)
-static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
-{
-    char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
-    if (stkbuf == NULL)
-        return NULL;
-#ifndef __clang_gcanalyzer__
-    ((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber
-    ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber
-#endif
-    return stkbuf;
+    setcontext(t->ctx);
 }
 #endif
 
 #if defined(JL_HAVE_UNW_CONTEXT)
+#ifdef _OS_WINDOWS_
+#error unw_context_t not defined in Windows
+#endif
 static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 {
     volatile int returns = 0;
@@ -1331,15 +1338,15 @@ static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, &t->ctx);
+    int r = unw_init_local(&c, t->ctx);
     if (r < 0)
         abort();
-    jl_unw_swapcontext(&lastt->ctx, &c);
+    jl_unw_swapcontext(lastt->ctx, &c);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    int r = unw_init_local(&c, &t->ctx);
+    int r = unw_init_local(&c, t->ctx);
     if (r < 0)
         abort();
     unw_resume(&c);
@@ -1347,14 +1354,14 @@ static void jl_set_fiber(jl_ucontext_t *t)
 #elif defined(JL_HAVE_ASM)
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
     tsan_switch_to_ctx(t);
     jl_set_fiber(t); // doesn't return
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    jl_longjmp(t->ctx.uc_mcontext, 1);
+    jl_longjmp(t->ctx->uc_mcontext, 1);
 }
 #endif
 
@@ -1375,14 +1382,14 @@ static void jl_set_fiber(jl_ucontext_t *t)
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
     unw_cursor_t c;
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
-    int r = unw_getcontext(&t->ctx);
+    int r = unw_getcontext(t->ctx);
     if (r)
         abort();
-    if (unw_init_local(&c, &t->ctx))
+    if (unw_init_local(&c, t->ctx))
         abort();
     PUSH_RET(&c, stk);
 #if defined __linux__
@@ -1398,43 +1405,46 @@ static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
     unw_cursor_t c;
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
-    r = unw_getcontext(&t->ctx);
+    r = unw_getcontext(t->ctx);
     if (r != 0)
         abort();
-    if (unw_init_local(&c, &t->ctx))
+    if (unw_init_local(&c, t->ctx))
         abort();
     PUSH_RET(&c, stk);
     if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk))
         abort();
     if (unw_set_reg(&c, UNW_REG_IP, fn))
         abort();
-    jl_unw_swapcontext(&lastt->ctx, &c);
+    jl_unw_swapcontext(lastt->ctx, &c);
 }
 #endif
 
 #if defined(JL_HAVE_ASM)
+#ifdef _OS_WINDOWS_
+#error JL_HAVE_ASM not defined in Windows
+#endif
 JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
 #ifdef JL_HAVE_UNW_CONTEXT
     volatile int returns = 0;
-    int r = unw_getcontext(&lastt->ctx);
+    int r = unw_getcontext(lastt->ctx);
     if (++returns == 2) // r is garbage after the first return
         return;
     if (r != 0 || returns != 1)
         abort();
 #else
-    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx->uc_mcontext, 0))
         return;
 #endif
     tsan_switch_to_ctx(t);
@@ -1442,8 +1452,9 @@ JL_NO_ASAN static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *
 }
 JL_NO_ASAN static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    char *stk = ((char**)&t->ctx)[0];
-    size_t ssize = ((size_t*)&t->ctx)[1];
+CFI_NORETURN
+    char *stk = (char*)t->stkbuf;
+    size_t ssize = t->bufsz;
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
 #ifdef _CPU_X86_64_
@@ -1541,14 +1552,14 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     }
 #endif
     if (always_copy_stacks) {
-        ct->copy_stack = 1;
-        ct->stkbuf = NULL;
-        ct->bufsz = 0;
+        ct->ctx.copy_stack = 1;
+        ct->ctx.stkbuf = NULL;
+        ct->ctx.bufsz = 0;
     }
     else {
-        ct->copy_stack = 0;
-        ct->stkbuf = stack;
-        ct->bufsz = ssize;
+        ct->ctx.copy_stack = 0;
+        ct->ctx.stkbuf = stack;
+        ct->ctx.bufsz = ssize;
     }
 
 #ifdef USE_TRACY
@@ -1556,7 +1567,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     strcpy(unique_string, "Root");
     ct->name = unique_string;
 #endif
-    ct->started = 1;
+    ct->ctx.started = 1;
     ct->next = jl_nothing;
     ct->queue = jl_nothing;
     ct->tls = jl_nothing;
@@ -1596,21 +1607,18 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
     if (always_copy_stacks) {
         // when this is set, we will attempt to corrupt the process stack to switch tasks,
         // although this is unreliable, and thus not recommended
-        ptls->stackbase = stack_hi;
-        ptls->stacksize = ssize;
-#ifdef _OS_WINDOWS_
-        ptls->copy_stack_ctx.uc_stack.ss_sp = stack_hi;
-        ptls->copy_stack_ctx.uc_stack.ss_size = ssize;
-#endif
-        if (jl_setjmp(ptls->copy_stack_ctx.uc_mcontext, 0))
-            start_task(); // sanitizer_finish_switch_fiber is part of start_task
+        ptls->stackbase = jl_get_frame_addr();
+        ptls->stacksize =  (char*)ptls->stackbase - (char*)stack_lo;
     }
     else {
-        ssize = JL_STACK_SIZE;
-        char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
+        size_t bufsz = JL_STACK_SIZE;
+        void *stkbuf = jl_malloc_stack(&bufsz, NULL);
         if (stkbuf != NULL) {
-            ptls->stackbase = stkbuf + ssize;
-            ptls->stacksize = ssize;
+            ptls->stackbase = (char*)stkbuf + bufsz;
+            ptls->stacksize = bufsz;
+        }
+        else {
+            ptls->stacksize = 0;
         }
     }
 #endif
@@ -1623,7 +1631,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 
 JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT
 {
-    return t->started;
+    return t->ctx.started;
 }
 
 JL_DLLEXPORT int16_t jl_get_task_tid(jl_task_t *t) JL_NOTSAFEPOINT
diff --git a/src/threading.c b/src/threading.c
index 40ab53c761a33..0cce66e695d34 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -74,6 +74,16 @@ JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
 
 JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 {
+#ifdef _OS_DARWIN_
+    jl_task_t *ct = jl_get_current_task();
+    if (ct != NULL && ct->ptls) {
+        if (sr == NULL)
+            pthread_setspecific(jl_safe_restore_key, (void*)sr);
+        ct->ptls->safe_restore = sr;
+        if (sr == NULL)
+            return;
+    }
+#endif
     pthread_setspecific(jl_safe_restore_key, (void*)sr);
 }
 #endif
@@ -82,51 +92,17 @@ JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
 // The tls_states buffer:
 //
 // On platforms that do not use ELF (i.e. where `__thread` is emulated with
-// lower level API) (Mac, Windows), we use the platform runtime API to create
+// lower level API) (Windows), we use the platform runtime API to create
 // TLS variable directly.
 // This is functionally equivalent to using `__thread` but can be
 // more efficient since we can have better control over the creation and
 // initialization of the TLS buffer.
 //
-// On platforms that use ELF (Linux, FreeBSD), we use a `__thread` variable
+// On platforms that support native TLS (ELF platforms + Macos) we use a `__thread` variable
 // as the fallback in the shared object. For better efficiency, we also
 // create a `__thread` variable in the main executable using a static TLS
 // model.
-#if defined(_OS_DARWIN_)
-// Mac doesn't seem to have static TLS model so the runtime TLS getter
-// registration will only add overhead to TLS access. The `__thread` variables
-// are emulated with `pthread_key_t` so it is actually faster to use it directly.
-static pthread_key_t jl_pgcstack_key;
-
-__attribute__((constructor)) void jl_init_tls(void)
-{
-    pthread_key_create(&jl_pgcstack_key, NULL);
-}
-
-JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
-{
-    return (jl_gcframe_t**)pthread_getspecific(jl_pgcstack_key);
-}
-
-void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
-{
-    pthread_setspecific(jl_pgcstack_key, (void*)pgcstack);
-}
-
-void jl_pgcstack_getkey(jl_get_pgcstack_func **f, pthread_key_t *k)
-{
-    // for codegen
-    *f = pthread_getspecific;
-    *k = jl_pgcstack_key;
-}
-
-
-JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, pthread_key_t k)
-{
-    jl_safe_printf("ERROR: Attempt to change TLS address.\n");
-}
-
-#elif defined(_OS_WINDOWS_)
+#if defined(_OS_WINDOWS_)
 // Apparently windows doesn't have a static TLS model (or one that can be
 // reliably used from a shared library) either..... Use `TLSAlloc` instead.
 
@@ -338,6 +314,18 @@ JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
     return -1; // everything else uses threadpool -1 (does not belong to any threadpool)
 }
 
+// get thread local rng
+JL_DLLEXPORT uint64_t jl_get_ptls_rng(void) JL_NOTSAFEPOINT
+{
+    return jl_current_task->ptls->rngseed;
+}
+
+// get thread local rng
+JL_DLLEXPORT void jl_set_ptls_rng(uint64_t new_seed) JL_NOTSAFEPOINT
+{
+    jl_current_task->ptls->rngseed = new_seed;
+}
+
 jl_ptls_t jl_init_threadtls(int16_t tid)
 {
 #ifndef _OS_WINDOWS_
@@ -352,9 +340,8 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
 #endif
     ptls->system_id = uv_thread_self();
     ptls->rngseed = jl_rand();
-    if (tid == 0) {
+    if (tid == 0)
         ptls->disable_gc = 1;
-    }
 #ifdef _OS_WINDOWS_
     if (tid == 0) {
         if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
@@ -405,6 +392,12 @@ jl_ptls_t jl_init_threadtls(int16_t tid)
     jl_fence();
     uv_mutex_unlock(&tls_lock);
 
+#if !defined(_OS_WINDOWS_) && !defined(JL_DISABLE_LIBUNWIND) && !defined(LLVMLIBUNWIND)
+    // ensures libunwind TLS space for this thread is allocated eagerly
+    // to make unwinding async-signal-safe even when using thread local caches.
+    unw_ensure_tls();
+#endif
+
     return ptls;
 }
 
@@ -459,8 +452,7 @@ void jl_safepoint_resume_all_threads(jl_task_t *ct)
 
 void jl_task_frame_noreturn(jl_task_t *ct) JL_NOTSAFEPOINT;
 void scheduler_delete_thread(jl_ptls_t ptls) JL_NOTSAFEPOINT;
-
-void jl_free_thread_gc_state(jl_ptls_t ptls);
+void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz) JL_NOTSAFEPOINT;
 
 static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 {
@@ -489,7 +481,7 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
         }
         if (signal_stack != NULL) {
             if (signal_stack_size)
-                jl_free_stack(signal_stack, signal_stack_size);
+                _jl_free_stack(ptls ,signal_stack, signal_stack_size);
             else
                 free(signal_stack);
         }
@@ -535,7 +527,6 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER
 #else
     pthread_mutex_unlock(&in_signal_lock);
 #endif
-    jl_deinit_thread_heap(ptls);
     free(ptls->bt_data);
     small_arraylist_free(&ptls->locks);
     ptls->previous_exception = NULL;
@@ -769,7 +760,7 @@ void jl_init_threading(void)
     gc_first_tid = nthreads + nthreadsi;
 }
 
-static uv_barrier_t thread_init_done;
+uv_barrier_t thread_init_done;
 
 void jl_start_threads(void)
 {
@@ -808,30 +799,20 @@ void jl_start_threads(void)
     uv_barrier_init(&thread_init_done, nthreads);
 
     // GC/System threads need to be after the worker threads.
-    int nworker_threads = nthreads - ngcthreads;
+    int nmutator_threads = nthreads - ngcthreads;
 
-    for (i = 1; i < nthreads; ++i) {
+    for (i = 1; i < nmutator_threads; ++i) {
         jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
         t->tid = i;
         t->barrier = &thread_init_done;
-        if (i < nworker_threads) {
-            uv_thread_create(&uvtid, jl_threadfun, t);
-            if (exclusive) {
-                mask[i] = 1;
-                uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
-                mask[i] = 0;
-            }
-        }
-        else if (i == nthreads - 1 && jl_n_sweepthreads == 1) {
-            uv_thread_create(&uvtid, jl_concurrent_gc_threadfun, t);
-        }
-        else {
-            uv_thread_create(&uvtid, jl_parallel_gc_threadfun, t);
+        uv_thread_create(&uvtid, jl_threadfun, t);
+        if (exclusive) {
+            mask[i] = 1;
+            uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
+            mask[i] = 0;
         }
         uv_thread_detach(&uvtid);
     }
-
-    uv_barrier_wait(&thread_init_done);
 }
 
 _Atomic(unsigned) _threadedregion; // keep track of whether to prioritize IO or threading
diff --git a/src/threading.h b/src/threading.h
index 260ecffa30dd5..cb26537699713 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -12,6 +12,8 @@ extern "C" {
 
 #define PROFILE_JL_THREADING            0
 
+extern uv_barrier_t thread_init_done;
+
 extern _Atomic(jl_ptls_t*) jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
 
 typedef struct _jl_threadarg_t {
diff --git a/src/timing.c b/src/timing.c
index 590e52b8d523d..265e50ad3dd74 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -6,7 +6,7 @@
 #include "options.h"
 #include "stdio.h"
 
-#if defined(USE_TRACY) || defined(USE_ITTAPI)
+#if defined(USE_TRACY) || defined(USE_ITTAPI) || defined(USE_NVTX)
 #define DISABLE_FREQUENT_EVENTS
 #endif
 
@@ -49,6 +49,10 @@ static arraylist_t jl_timing_ittapi_events;
 static jl_mutex_t jl_timing_ittapi_events_lock;
 #endif //USE_ITTAPI
 
+#ifdef USE_NVTX
+static nvtxDomainHandle_t jl_timing_nvtx_domain;
+#endif
+
 #ifdef USE_TIMING_COUNTS
 static int cmp_counts_events(const void *a, const void *b) {
     jl_timing_counts_event_t *event_a = *(jl_timing_counts_event_t **)a;
@@ -139,6 +143,13 @@ void jl_init_timing(void)
     qsort(jl_timing_subsystems, JL_TIMING_SUBSYSTEM_LAST,
           sizeof(const char *), indirect_strcmp);
 
+#ifdef USE_NVTX
+    jl_timing_nvtx_domain = nvtxDomainCreateA("julia");
+    for (int i = 0; i < JL_TIMING_SUBSYSTEM_LAST; i++) {
+        nvtxDomainNameCategoryA(jl_timing_nvtx_domain, i + 1, jl_timing_subsystems[i]);
+    }
+#endif
+
     int i __attribute__((unused)) = 0;
 #ifdef USE_ITTAPI
     i = 0;
@@ -317,6 +328,25 @@ JL_DLLEXPORT jl_timing_event_t *_jl_timing_event_create(const char *subsystem, c
     event->ittapi_event = _jl_timing_ittapi_event_create(name);
 #endif // USE_ITTAPI
 
+#ifdef USE_NVTX
+    nvtxEventAttributes_t nvtx_attrs = {0};
+    nvtx_attrs.version = NVTX_VERSION;
+    nvtx_attrs.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+
+    nvtxStringHandle_t nvtx_message = nvtxDomainRegisterStringA(jl_timing_nvtx_domain, name);
+    nvtx_attrs.messageType = NVTX_MESSAGE_TYPE_REGISTERED;
+    nvtx_attrs.message.registered = nvtx_message;
+
+    // 0 is the default (unnamed) category
+    nvtx_attrs.category = maybe_subsystem == JL_TIMING_SUBSYSTEM_LAST ? 0 : maybe_subsystem+1;
+
+    // simple Knuth hash to get nice colors
+    nvtx_attrs.colorType = NVTX_COLOR_ARGB;
+    nvtx_attrs.color = (nvtx_attrs.category * 2654435769) >> 8;
+
+    event->nvtx_attrs = nvtx_attrs;
+#endif // USE_NVTX
+
 #ifdef USE_TRACY
     event->tracy_srcloc.name = name;
     event->tracy_srcloc.function = function;
@@ -347,6 +377,7 @@ JL_DLLEXPORT void _jl_timing_block_start(jl_timing_block_t *block) {
     uint64_t t = cycleclock(); (void)t;
     _COUNTS_START(&block->counts_ctx, t);
     _ITTAPI_START(block);
+    _NVTX_START(block);
     _TRACY_START(block);
 
     jl_timing_block_t **prevp = &jl_current_task->ptls->timing_stack;
@@ -362,6 +393,7 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *block) {
     if (block->is_running) {
         uint64_t t = cycleclock(); (void)t;
         _ITTAPI_STOP(block);
+        _NVTX_STOP(block);
         _TRACY_STOP(block->tracy_ctx);
         _COUNTS_STOP(block, t);
 
diff --git a/src/timing.h b/src/timing.h
index 1cc82b67e2b6a..61118cc3b41ab 100644
--- a/src/timing.h
+++ b/src/timing.h
@@ -66,7 +66,7 @@ JL_DLLEXPORT void _jl_timing_block_end(jl_timing_block_t *cur_block);
 #define HAVE_TIMING_SUPPORT
 #endif
 
-#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_TIMING_COUNTS )
+#if defined( USE_TRACY ) || defined( USE_ITTAPI ) || defined( USE_NVTX ) || defined( USE_TIMING_COUNTS )
 #define ENABLE_TIMINGS
 #endif
 
@@ -115,6 +115,12 @@ typedef struct ___tracy_source_location_data TracySrcLocData;
 #include <ittapi/ittnotify.h>
 #endif
 
+#ifdef USE_NVTX
+#pragma GCC visibility push(default)
+#include <nvtx3/nvToolsExt.h>
+#pragma GCC visibility pop
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -276,6 +282,20 @@ typedef struct _jl_timing_counts_t {
 #define _ITTAPI_STOP(block)
 #endif
 
+
+#ifdef USE_NVTX
+#define _NVTX_EVENT_MEMBER              nvtxEventAttributes_t nvtx_attrs;
+#define _NVTX_BLOCK_MEMBER              nvtxRangeId_t nvtx_rangeid;
+#define _NVTX_START(block)              (block)->nvtx_rangeid = nvtxDomainRangeStartEx(jl_timing_nvtx_domain, &(block)->event->nvtx_attrs)
+#define _NVTX_STOP(block)               nvtxDomainRangeEnd(jl_timing_nvtx_domain, (block)->nvtx_rangeid)
+#else
+#define _NVTX_EVENT_MEMBER
+#define _NVTX_BLOCK_MEMBER
+#define _NVTX_START(block)
+#define _NVTX_STOP(block)
+#endif
+
+
 /**
  * Top-level jl_timing implementation
  **/
@@ -292,6 +312,7 @@ extern const char *jl_timing_subsystems[(int)JL_TIMING_SUBSYSTEM_LAST];
 struct _jl_timing_event_t { // typedef in julia.h
     _TRACY_EVENT_MEMBER
     _ITTAPI_EVENT_MEMBER
+    _NVTX_EVENT_MEMBER
     _COUNTS_EVENT_MEMBER
 
     int subsystem;
@@ -310,6 +331,7 @@ struct _jl_timing_block_t { // typedef in julia.h
 
     _TRACY_BLOCK_MEMBER
     _ITTAPI_BLOCK_MEMBER
+    _NVTX_BLOCK_MEMBER
     _COUNTS_BLOCK_MEMBER
 
     uint8_t is_running;
@@ -362,6 +384,12 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
 #define _ITTAPI_COUNTER_MEMBER
 #endif
 
+#ifdef USE_NVTX
+#define _NVTX_COUNTER_MEMBER void * __nvtx_null;
+#else
+#define _NVTX_COUNTER_MEMBER
+#endif
+
 #ifdef USE_TRACY
 # define _TRACY_COUNTER_MEMBER jl_tracy_counter_t tracy_counter;
 # else
@@ -376,6 +404,7 @@ STATIC_INLINE void _jl_timing_suspend_destroy(jl_timing_suspend_t *suspend) JL_N
 
 typedef struct {
     _ITTAPI_COUNTER_MEMBER
+    _NVTX_COUNTER_MEMBER
     _TRACY_COUNTER_MEMBER
     _COUNTS_MEMBER
 } jl_timing_counter_t;
diff --git a/src/toplevel.c b/src/toplevel.c
index af6f4c7832f29..60460abe6078d 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -157,25 +157,31 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         }
     }
     else {
-        jl_binding_t *b = jl_get_binding_wr(parent_module, name, 1);
-        jl_declare_constant(b, parent_module, name);
-        jl_value_t *old = NULL;
-        if (!jl_atomic_cmpswap(&b->value, &old, (jl_value_t*)newm)) {
-            if (!jl_is_module(old)) {
-                jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
+        jl_binding_t *b = jl_get_module_binding(parent_module, name, 1);
+        jl_binding_partition_t *bpart = jl_get_binding_partition(b, ct->world_age);
+        jl_ptr_kind_union_t pku = encode_restriction(NULL, BINDING_KIND_CONST);
+        jl_ptr_kind_union_t new_pku = encode_restriction((jl_value_t*)newm, BINDING_KIND_CONST);
+        if (!jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku)) {
+            if (decode_restriction_kind(pku) != BINDING_KIND_CONST) {
+                jl_declare_constant_val(b, parent_module, name, (jl_value_t*)newm);
+            } else {
+                // As a special exception allow binding replacement of modules
+                if (!jl_is_module(decode_restriction_value(pku))) {
+                    jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
+                }
+                if (jl_generating_output())
+                    jl_errorf("cannot replace module %s during compilation", jl_symbol_name(name));
+                jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
+                pku = jl_atomic_exchange(&bpart->restriction, new_pku);
+            }
+            jl_gc_wb(bpart, newm);
+            if (decode_restriction_value(pku) != NULL && jl_is_module(decode_restriction_value(pku))) {
+                // create a hidden gc root for the old module
+                JL_LOCK(&jl_modules_mutex);
+                uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, decode_restriction_value(pku));
+                *refcnt += 1;
+                JL_UNLOCK(&jl_modules_mutex);
             }
-            if (jl_generating_output())
-                jl_errorf("cannot replace module %s during compilation", jl_symbol_name(name));
-            jl_printf(JL_STDERR, "WARNING: replacing module %s.\n", jl_symbol_name(name));
-            old = jl_atomic_exchange(&b->value, (jl_value_t*)newm);
-        }
-        jl_gc_wb(b, newm);
-        if (old != NULL) {
-            // create a hidden gc root for the old module
-            JL_LOCK(&jl_modules_mutex);
-            uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)old);
-            *refcnt += 1;
-            JL_UNLOCK(&jl_modules_mutex);
         }
     }
 
@@ -209,6 +215,10 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         form = NULL;
     }
 
+    newm->file = jl_symbol(filename);
+    jl_gc_wb_knownold(newm, newm->file);
+    newm->line = lineno;
+
     for (int i = 0; i < jl_array_nrows(exprs); i++) {
         // process toplevel form
         ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
@@ -218,27 +228,6 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     }
     ct->world_age = last_age;
 
-#if 0
-    // some optional post-processing steps
-    size_t i;
-    jl_svec_t *table = jl_atomic_load_relaxed(&newm->bindings);
-    for (size_t i = 0; i < jl_svec_len(table); i++) {
-        jl_binding_t *b = (jl_binding_t*)jl_svecref(table, i);
-        if ((void*)b != jl_nothing) {
-            // remove non-exported macros
-            if (jl_symbol_name(b->name)[0]=='@' &&
-                !b->exportp && b->owner == b)
-                b->value = NULL;
-            // error for unassigned exports
-            /*
-            if (b->exportp && b->owner==b && b->value==NULL)
-                jl_errorf("identifier %s exported from %s is not initialized",
-                          jl_symbol_name(b->name), jl_symbol_name(newm->name));
-            */
-        }
-    }
-#endif
-
     JL_LOCK(&jl_modules_mutex);
     uintptr_t *refcnt = (uintptr_t*)ptrhash_bp(&jl_current_modules, (void*)newm);
     assert(*refcnt > (uintptr_t)HT_NOTFOUND);
@@ -310,18 +299,38 @@ static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f
     return args[0];
 }
 
-void jl_binding_set_type(jl_binding_t *b, jl_value_t *ty, int error)
+void jl_binding_set_type(jl_binding_t *b, jl_module_t *mod, jl_sym_t *sym, jl_value_t *ty)
 {
-    jl_value_t *old_ty = NULL;
-    if (jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, ty)) {
-        jl_gc_wb(b, ty);
-    }
-    else if (error && !jl_types_equal(ty, old_ty)) {
-        jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
-                  jl_symbol_name(jl_globalref_mod(b->globalref)->name), jl_symbol_name(jl_globalref_name(b->globalref)));
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    jl_ptr_kind_union_t new_pku = encode_restriction(ty, BINDING_KIND_GLOBAL);
+    while (1) {
+        if (decode_restriction_kind(pku) != BINDING_KIND_GLOBAL) {
+            if (jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+                if (jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+                    break;
+                continue;
+            } else {
+                jl_errorf("cannot set type for imported global %s.%s.",
+                        jl_symbol_name(mod->name), jl_symbol_name(sym));
+            }
+        }
+        if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            jl_errorf("cannot set type for imported constant %s.%s.",
+                    jl_symbol_name(mod->name), jl_symbol_name(sym));
+        }
+        jl_value_t *old_ty = decode_restriction_value(pku);
+        if (!jl_types_equal(ty, old_ty)) {
+            jl_errorf("cannot set type for global %s.%s. It already has a value or is already set to a different type.",
+                    jl_symbol_name(mod->name), jl_symbol_name(sym));
+        }
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, new_pku))
+            break;
     }
+    jl_gc_wb(bpart, ty);
 }
 
+extern void check_safe_newbinding(jl_module_t *m, jl_sym_t *var);
 void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type) {
     // create uninitialized mutable binding for "global x" decl sometimes or probably
     jl_module_t *gm;
@@ -336,11 +345,16 @@ void jl_declare_global(jl_module_t *m, jl_value_t *arg, jl_value_t *set_type) {
         gm = m;
         gs = (jl_sym_t*)arg;
     }
-    if (!jl_binding_resolved_p(gm, gs) || set_type) {
-        jl_binding_t *b = jl_get_binding_wr(gm, gs, 1);
-        if (set_type) {
-            jl_binding_set_type(b, set_type, 1);
-        }
+    jl_binding_t *b = jl_get_module_binding(gm, gs, 1);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    while (decode_restriction_kind(pku) == BINDING_KIND_GUARD || decode_restriction_kind(pku) == BINDING_KIND_FAILED) {
+        check_safe_newbinding(gm, gs);
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction(NULL, BINDING_KIND_DECLARED)))
+            break;
+    }
+    if (set_type) {
+        jl_binding_set_type(b, gm, gs, set_type);
     }
 }
 
@@ -415,9 +429,7 @@ static void expr_attributes(jl_value_t *v, jl_array_t *body, int *has_ccall, int
             jl_sym_t *name = jl_globalref_name(f);
             if (jl_binding_resolved_p(mod, name)) {
                 jl_binding_t *b = jl_get_binding(mod, name);
-                if (b && b->constp) {
-                    called = jl_atomic_load_relaxed(&b->value);
-                }
+                called = jl_get_binding_value_if_const(b);
             }
         }
         else if (jl_is_quotenode(f)) {
@@ -647,21 +659,16 @@ static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym
     assert(m);
     jl_sym_t *name = asname ? asname : import->name;
     // TODO: this is a bit race-y with what error message we might print
-    jl_binding_t *b = jl_get_module_binding(m, name, 0);
-    jl_binding_t *b2;
-    if (b != NULL && (b2 = jl_atomic_load_relaxed(&b->owner)) != NULL) {
-        if (b2->constp && jl_atomic_load_relaxed(&b2->value) == (jl_value_t*)import)
-            return;
-        if (b2 != b)
-            jl_errorf("importing %s into %s conflicts with an existing global",
-                      jl_symbol_name(name), jl_symbol_name(m->name));
-    }
-    else {
-        b = jl_get_binding_wr(m, name, 1);
+    jl_binding_t *b = jl_get_module_binding(m, name, 1);
+    if (jl_get_binding_value_if_const(b) == (jl_value_t*)import)
+        return;
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    if (decode_restriction_kind(pku) != BINDING_KIND_GUARD && decode_restriction_kind(pku) != BINDING_KIND_FAILED) {
+        jl_errorf("importing %s into %s conflicts with an existing global",
+                    jl_symbol_name(name), jl_symbol_name(m->name));
     }
-    jl_declare_constant(b, m, name);
-    jl_checked_assignment(b, m, name, (jl_value_t*)import);
-    b->imported = 1;
+    jl_declare_constant_val2(b, m, name, (jl_value_t*)import, BINDING_KIND_CONST_IMPORT);
 }
 
 // in `import A.B: x, y, ...`, evaluate the `A.B` part if it exists
@@ -677,7 +684,7 @@ static jl_module_t *eval_import_from(jl_module_t *m JL_PROPAGATES_ROOT, jl_expr_
                     jl_module_t *from = eval_import_path(m, NULL, path->args, &name, keyword);
                     if (name != NULL) {
                         from = (jl_module_t*)jl_eval_global_var(from, name);
-                        if (!jl_is_module(from))
+                        if (!from || !jl_is_module(from))
                             jl_errorf("invalid %s path: \"%s\" does not name a module", keyword, jl_symbol_name(name));
                     }
                     return from;
@@ -723,10 +730,49 @@ static void jl_eval_errorf(jl_module_t *m, const char *filename, int lineno, con
     JL_GC_POP();
 }
 
-JL_DLLEXPORT void jl_declare_constant_val(jl_binding_t *b, jl_module_t *gm, jl_sym_t *gs, jl_value_t *val)
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val2(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val, enum jl_partition_kind constant_kind)
 {
-    jl_declare_constant(b, gm, gs);
-    jl_checked_assignment(b, gm, gs, val);
+    jl_binding_partition_t *bpart = jl_get_binding_partition(b, jl_current_task->world_age);
+    jl_ptr_kind_union_t pku = jl_atomic_load_relaxed(&bpart->restriction);
+    int did_warn = 0;
+    while (1) {
+        if (jl_bkind_is_some_constant(decode_restriction_kind(pku))) {
+            if (!val)
+                return bpart;
+            jl_value_t *old = decode_restriction_value(pku);
+            if (jl_egal(val, old))
+                break;
+            if (!did_warn) {
+                if (jl_typeof(val) != jl_typeof(old) || jl_is_type(val) || jl_is_module(val))
+                    jl_errorf("invalid redefinition of constant %s.%s",
+                        jl_symbol_name(mod->name),
+                        jl_symbol_name(var));
+                else
+                    jl_safe_printf("WARNING: redefinition of constant %s.%s. This may fail, cause incorrect answers, or produce other errors.\n",
+                        jl_symbol_name(mod->name),
+                        jl_symbol_name(var));
+                did_warn = 1;
+            }
+        } else if (!jl_bkind_is_some_guard(decode_restriction_kind(pku))) {
+            if (jl_bkind_is_some_import(decode_restriction_kind(pku))) {
+                jl_errorf("cannot declare %s.%s constant; it was already declared as an import",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+            } else {
+                jl_errorf("cannot declare %s.%s constant; it was already declared global",
+                        jl_symbol_name(mod->name), jl_symbol_name(var));
+            }
+        }
+        if (jl_atomic_cmpswap(&bpart->restriction, &pku, encode_restriction(val, constant_kind))) {
+            jl_gc_wb(bpart, val);
+            break;
+        }
+    }
+    return bpart;
+}
+
+JL_DLLEXPORT jl_binding_partition_t *jl_declare_constant_val(jl_binding_t *b, jl_module_t *mod, jl_sym_t *var, jl_value_t *val)
+{
+    return jl_declare_constant_val2(b, mod, var, val, BINDING_KIND_CONST);
 }
 
 JL_DLLEXPORT void jl_eval_const_decl(jl_module_t *m, jl_value_t *arg, jl_value_t *val)
@@ -742,12 +788,8 @@ JL_DLLEXPORT void jl_eval_const_decl(jl_module_t *m, jl_value_t *arg, jl_value_t
         gm = m;
         gs = (jl_sym_t*)arg;
     }
-    jl_binding_t *b = jl_get_binding_wr(gm, gs, 1);
-    if (val) {
-        jl_declare_constant_val(b, gm, gs, val);
-    } else {
-        jl_declare_constant(b, gm, gs);
-    }
+    jl_binding_t *b = jl_get_module_binding(gm, gs, 1);
+    jl_declare_constant_val(b, gm, gs, val);
 }
 
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded, const char **toplevel_filename, int *toplevel_lineno)
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index bd44369655ae4..9bca72f6c7a14 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -175,13 +175,11 @@ function load_overrides(;force::Bool = false)::Dict{Symbol, Any}
         end
     end
 
-    overrides = Dict{Symbol,Any}(
-        # Overrides by UUID
-        :UUID => overrides_uuid,
-
-        # Overrides by hash
-        :hash => overrides_hash
-    )
+    overrides = Dict{Symbol,Any}()
+    # Overrides by UUID
+    overrides[:UUID] = overrides_uuid
+    # Overrides by hash
+    overrides[:hash] = overrides_hash
 
     ARTIFACT_OVERRIDES[] = overrides
     return overrides
@@ -351,7 +349,7 @@ function process_overrides(artifact_dict::Dict, pkg_uuid::Base.UUID)
 
             # If we've got a platform-specific friend, override all hashes:
             artifact_dict_name = artifact_dict[name]
-            if isa(artifact_dict_name, Array)
+            if isa(artifact_dict_name, Vector{Any})
                 for entry in artifact_dict_name
                     entry = entry::Dict{String,Any}
                     hash = SHA1(entry["git-tree-sha1"]::String)
@@ -544,11 +542,11 @@ function jointail(dir, tail)
     end
 end
 
-function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, @nospecialize(lazyartifacts))
-    moduleroot = Base.moduleroot(__module__)
-    if haskey(Base.module_keys, moduleroot)
+function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, ::Val{LazyArtifacts}) where LazyArtifacts
+    pkg = Base.PkgId(__module__)
+    if pkg.uuid !== nothing
         # Process overrides for this UUID, if we know what it is
-        process_overrides(artifact_dict, Base.module_keys[moduleroot].uuid)
+        process_overrides(artifact_dict, pkg.uuid)
     end
 
     # If the artifact exists, we're in the happy path and we can immediately
@@ -563,11 +561,11 @@ function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dic
     # If not, try determining what went wrong:
     meta = artifact_meta(name, artifact_dict, artifacts_toml; platform)
     if meta !== nothing && get(meta, "lazy", false)
-        if lazyartifacts isa Module && isdefined(lazyartifacts, :ensure_artifact_installed)
-            if nameof(lazyartifacts) in (:Pkg, :Artifacts)
+        if LazyArtifacts isa Module && isdefined(LazyArtifacts, :ensure_artifact_installed)
+            if nameof(LazyArtifacts) in (:Pkg, :Artifacts)
                 Base.depwarn("using Pkg instead of using LazyArtifacts is deprecated", :var"@artifact_str", force=true)
             end
-            return jointail(lazyartifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform), path_tail)
+            return jointail(LazyArtifacts.ensure_artifact_installed(string(name), meta, artifacts_toml; platform), path_tail)
         end
         error("Artifact $(repr(name)) is a lazy artifact; package developers must call `using LazyArtifacts` in $(__module__) before using lazy artifacts.")
     end
@@ -699,10 +697,10 @@ macro artifact_str(name, platform=nothing)
 
     # Check if the user has provided `LazyArtifacts`, and thus supports lazy artifacts
     # If not, check to see if `Pkg` or `Pkg.Artifacts` has been imported.
-    lazyartifacts = nothing
+    LazyArtifacts = nothing
     for module_name in (:LazyArtifacts, :Pkg, :Artifacts)
         if isdefined(__module__, module_name)
-            lazyartifacts = GlobalRef(__module__, module_name)
+            LazyArtifacts = GlobalRef(__module__, module_name)
             break
         end
     end
@@ -714,7 +712,7 @@ macro artifact_str(name, platform=nothing)
         platform = HostPlatform()
         artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml, platform)
         return quote
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), $(lazyartifacts))::String
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), Val($(LazyArtifacts)))::String
         end
     else
         if platform === nothing
@@ -723,7 +721,7 @@ macro artifact_str(name, platform=nothing)
         return quote
             local platform = $(esc(platform))
             local artifact_name, artifact_path_tail, hash = artifact_slash_lookup($(esc(name)), $(artifact_dict), $(artifacts_toml), platform)
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, $(lazyartifacts))::String
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, Val($(LazyArtifacts)))::String
         end
     end
 end
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index 545dbd90775df..38b4f7ae86d29 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -684,9 +684,9 @@ value in the days field is uncertain.
 See the [API reference](@ref stdlib-dates-api) for additional information
 on methods exported from the `Dates` module.
 
-# [API reference](@id stdlib-dates-api)
+## [API reference](@id stdlib-dates-api)
 
-## Dates and Time Types
+### Dates and Time Types
 
 ```@docs
 Dates.Period
@@ -701,7 +701,7 @@ Dates.TimeZone
 Dates.UTC
 ```
 
-## Dates Functions
+### Dates Functions
 
 ```@docs
 Dates.DateTime(::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
@@ -730,7 +730,7 @@ Dates.now(::Type{Dates.UTC})
 Base.eps(::Union{Type{DateTime}, Type{Date}, Type{Time}, TimeType})
 ```
 
-### Accessor Functions
+#### Accessor Functions
 
 ```@docs
 Dates.year
@@ -758,7 +758,7 @@ Dates.monthday
 Dates.yearmonthday
 ```
 
-### Query Functions
+#### Query Functions
 
 ```@docs
 Dates.dayname
@@ -777,7 +777,7 @@ Dates.quarterofyear
 Dates.dayofquarter
 ```
 
-### Adjuster Functions
+#### Adjuster Functions
 
 ```@docs
 Base.trunc(::Dates.TimeType, ::Type{Dates.Period})
@@ -797,7 +797,7 @@ Dates.tonext(::Function, ::Dates.TimeType)
 Dates.toprev(::Function, ::Dates.TimeType)
 ```
 
-### Periods
+#### Periods
 
 ```@docs
 Dates.Period(::Any)
@@ -808,7 +808,7 @@ Dates.default
 Dates.periods
 ```
 
-### Rounding Functions
+#### Rounding Functions
 
 `Date` and `DateTime` values can be rounded to a specified resolution (e.g., 1 month or 15 minutes)
 with `floor`, `ceil`, or `round`.
@@ -837,7 +837,7 @@ Dates.date2epochdays
 Dates.datetime2epochms
 ```
 
-### Conversion Functions
+#### Conversion Functions
 
 ```@docs
 Dates.today
diff --git a/stdlib/Dates/src/types.jl b/stdlib/Dates/src/types.jl
index e1f7f900bff51..1978864b92554 100644
--- a/stdlib/Dates/src/types.jl
+++ b/stdlib/Dates/src/types.jl
@@ -203,7 +203,7 @@ function totaldays(y, m, d)
 end
 
 # If the year is divisible by 4, except for every 100 years, except for every 400 years
-isleapyear(y) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
+isleapyear(y::Integer) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
 
 # Number of days in month
 const DAYSINMONTH = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
diff --git a/stdlib/Dates/test/types.jl b/stdlib/Dates/test/types.jl
index 35a793867dc5a..29395ccf3a271 100644
--- a/stdlib/Dates/test/types.jl
+++ b/stdlib/Dates/test/types.jl
@@ -41,6 +41,7 @@ end
     @test Dates.isleapyear(-1) == false
     @test Dates.isleapyear(4) == true
     @test Dates.isleapyear(-4) == true
+    @test_throws MethodError Dates.isleapyear(Dates.Year(1992))
 end
 # Create "test" check manually
 y = Dates.Year(1)
@@ -262,7 +263,11 @@ end
 end
 
 @testset "issue #31524" begin
-    dt1 = Libc.strptime("%Y-%M-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
+    # Ensure the result doesn't depend on local timezone, especially on macOS
+    # where an extra internal call to `mktime` is affected by timezone settings.
+    dt1 = withenv("TZ" => "UTC") do
+        Libc.strptime("%Y-%m-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
+    end
     dt2 = Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)
 
     time = Time(dt1)
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index 7805348a4b2f5..b539771fbdb47 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,4 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = a9d274ff6588cc5dbfa90e908ee34c2408bab84a
+DOWNLOADS_SHA1 = 89d3c7dded535a77551e763a437a6d31e4d9bf84
 DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
 DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md
index 1b2212fcc5a28..15d4e39a45117 100644
--- a/stdlib/FileWatching/docs/src/index.md
+++ b/stdlib/FileWatching/docs/src/index.md
@@ -5,11 +5,17 @@ EditURL = "https://github.com/JuliaLang/julia/blob/master/stdlib/FileWatching/do
 # [File Events](@id lib-filewatching)
 
 ```@docs
-FileWatching.poll_fd
-FileWatching.poll_file
-FileWatching.watch_file
-FileWatching.watch_folder
-FileWatching.unwatch_folder
+poll_fd
+poll_file
+watch_file
+watch_folder
+unwatch_folder
+```
+```@docs
+FileMonitor
+FolderMonitor
+PollingFileWatcher
+FDWatcher
 ```
 
 # Pidfile
diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl
index 0c987ad01c828..7c743ce634193 100644
--- a/stdlib/FileWatching/src/FileWatching.jl
+++ b/stdlib/FileWatching/src/FileWatching.jl
@@ -6,7 +6,7 @@ Utilities for monitoring files and file descriptors for events.
 module FileWatching
 
 export
-    # one-shot API (returns results):
+    # one-shot API (returns results, race-y):
     watch_file, # efficient for small numbers of files
     watch_folder, # efficient for large numbers of files
     unwatch_folder,
@@ -22,11 +22,11 @@ export
     trymkpidlock
 
 import Base: @handle_as, wait, close, eventloop, notify_error, IOError,
-    _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
-    iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct,
-    preserve_handle, unpreserve_handle, isreadable, iswritable, isopen,
-    |, getproperty, propertynames
-import Base.Filesystem.StatStruct
+    uv_req_data, uv_req_set_data, associate_julia_struct, disassociate_julia_struct,
+    _sizeof_uv_poll, _sizeof_uv_fs, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
+    iolock_begin, iolock_end, preserve_handle, unpreserve_handle,
+    isreadable, iswritable, isopen, |, getproperty, propertynames
+import Base.Filesystem: StatStruct, uv_fs_req_cleanup
 if Sys.iswindows()
     import Base.WindowsRawSocket
 end
@@ -38,13 +38,13 @@ const UV_CHANGE = Int32(2)
 struct FileEvent
     renamed::Bool
     changed::Bool
-    timedout::Bool
+    timedout::Bool # aka canceled
     FileEvent(r::Bool, c::Bool, t::Bool) = new(r, c, t)
 end
 FileEvent() = FileEvent(false, false, true)
 FileEvent(flags::Integer) = FileEvent((flags & UV_RENAME) != 0,
                                       (flags & UV_CHANGE) != 0,
-                                      false)
+                                      iszero(flags))
 |(a::FileEvent, b::FileEvent) =
     FileEvent(a.renamed | b.renamed,
               a.changed | b.changed,
@@ -78,34 +78,183 @@ isreadable(f::FDEvent) = f.readable
 iswritable(f::FDEvent) = f.writable
 |(a::FDEvent, b::FDEvent) = FDEvent(getfield(a, :events) | getfield(b, :events))
 
+# Callback functions
+
+function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
+    t = @handle_as handle FileMonitor
+    lock(t.notify)
+    try
+        if status != 0
+            t.ioerrno = status
+            notify_error(t.notify, _UVError("FileMonitor", status))
+            uvfinalize(t)
+        elseif events != t.events
+            events = t.events |= events
+            notify(t.notify, all=false)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
+    t = @handle_as handle FolderMonitor
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FolderMonitor", status))
+        else
+            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
+            push!(t.channel, fname => FileEvent(events))
+            notify(t.notify)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
+    t = @handle_as handle _FDWatcher
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FDWatcher", status))
+        else
+            t.events |= events
+            if t.active[1] || t.active[2]
+                if isempty(t.notify)
+                    # if we keep hearing about events when nobody appears to be listening,
+                    # stop the poll to save cycles
+                    t.active = (false, false)
+                    ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
+                end
+            end
+            notify(t.notify, events)
+        end
+    finally
+        unlock(t.notify)
+    end
+    nothing
+end
+
+function uv_fspollcb(req::Ptr{Cvoid})
+    pfw = unsafe_pointer_to_objref(uv_req_data(req))::PollingFileWatcher
+    pfw.active = false
+    unpreserve_handle(pfw)
+    @assert pointer(pfw.stat_req) == req
+    r = Int32(ccall(:uv_fs_get_result, Cssize_t, (Ptr{Cvoid},), req))
+    statbuf = ccall(:uv_fs_get_statbuf, Ptr{UInt8}, (Ptr{Cvoid},), req)
+    curr_stat = StatStruct(pfw.file, statbuf, r)
+    uv_fs_req_cleanup(req)
+    lock(pfw.notify)
+    try
+        if !isempty(pfw.notify) # must discard the update if nobody watching
+            if pfw.ioerrno != r || (r == 0 && pfw.prev_stat != curr_stat)
+                if r == 0
+                    pfw.prev_stat = curr_stat
+                end
+                pfw.ioerrno = r
+                notify(pfw.notify, true)
+            end
+            pfw.timer = Timer(pfw.interval) do t
+                # async task
+                iolock_begin()
+                lock(pfw.notify)
+                try
+                    if pfw.timer === t # use identity check to test if this callback is stale by the time we got the lock
+                        pfw.timer = nothing
+                        @assert !pfw.active
+                        if isopen(pfw) && !isempty(pfw.notify)
+                            preserve_handle(pfw)
+                            uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},))
+                            err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                                eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid})
+                            err == 0 || notify(pfw.notify, _UVError("PollingFileWatcher (start)", err), error=true) # likely just ENOMEM
+                            pfw.active = true
+                        end
+                    end
+                finally
+                    unlock(pfw.notify)
+                end
+                iolock_end()
+                nothing
+            end
+        end
+    finally
+        unlock(pfw.notify)
+    end
+    nothing
+end
+
+# Types
+
+"""
+    FileMonitor(path::AbstractString)
+
+Watch file or directory `path` (which must exist) for changes until a change occurs. This
+function does not poll the file system and instead uses platform-specific functionality to
+receive notifications from the operating system (e.g. via inotify on Linux). See the NodeJS
+documentation linked below for details.
+
+`fm = FileMonitor(path)` acts like an auto-reset Event, so `wait(fm)` blocks until there has
+been at least one event in the file originally at the given path and then returns an object
+with boolean fields `renamed`, `changed`, `timedout` summarizing all changes that have
+occurred since the last call to `wait` returned.
+
+This behavior of this function varies slightly across platforms. See
+<https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+"""
 mutable struct FileMonitor
     @atomic handle::Ptr{Cvoid}
-    file::String
-    notify::Base.ThreadSynchronizer
-    events::Int32
-    active::Bool
+    const file::String
+    const notify::Base.ThreadSynchronizer
+    events::Int32 # accumulator for events that occurred since the last wait call, similar to Event with autoreset
+    ioerrno::Int32 # record the error, if any occurs (unlikely)
     FileMonitor(file::AbstractString) = FileMonitor(String(file))
     function FileMonitor(file::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
-        this = new(handle, file, Base.ThreadSynchronizer(), 0, false)
+        this = new(handle, file, Base.ThreadSynchronizer(), 0, 0)
         associate_julia_struct(handle, this)
         iolock_begin()
         err = ccall(:uv_fs_event_init, Cint, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
         if err != 0
             Libc.free(handle)
-            throw(_UVError("FileMonitor", err))
+            uv_error("FileMonitor", err)
         end
-        iolock_end()
         finalizer(uvfinalize, this)
+        uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
+        uv_error("FileMonitor (start)",
+                 ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
+                       this.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, file, 0))
+        iolock_end()
         return this
     end
 end
 
+
+"""
+    FolderMonitor(folder::AbstractString)
+
+Watch a file or directory `path` for changes until a change has occurred. This function does
+not poll the file system and instead uses platform-specific functionality to receive
+notifications from the operating system (e.g. via inotify on Linux). See the NodeJS
+documentation linked below for details.
+
+This acts similar to a Channel, so calling `take!` (or `wait`) blocks until some change has
+occurred. The `wait` function will return a pair where the first field is the name of the
+changed file (if available) and the second field is an object with boolean fields `renamed`
+and `changed`, giving the event that occurred on it.
+
+This behavior of this function varies slightly across platforms. See
+<https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+"""
 mutable struct FolderMonitor
     @atomic handle::Ptr{Cvoid}
     # notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError}
-    notify::Base.ThreadSynchronizer
-    channel::Vector{Any} # eltype = Pair{String, FileEvent}
+    const notify::Base.ThreadSynchronizer
+    const channel::Vector{Any} # eltype = Pair{String, FileEvent}
     FolderMonitor(folder::AbstractString) = FolderMonitor(String(folder))
     function FolderMonitor(folder::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
@@ -118,6 +267,7 @@ mutable struct FolderMonitor
             throw(_UVError("FolderMonitor", err))
         end
         finalizer(uvfinalize, this)
+        uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
         uv_error("FolderMonitor (start)",
                  ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
                        handle, uv_jl_fseventscb_folder::Ptr{Cvoid}, folder, 0))
@@ -126,36 +276,55 @@ mutable struct FolderMonitor
     end
 end
 
+# this is similar to uv_fs_poll, but strives to avoid the design mistakes that make it unsuitable for any usable purpose
+# https://github.com/libuv/libuv/issues/4543
+"""
+    PollingFileWatcher(path::AbstractString, interval_s::Real=5.007)
+
+Monitor a file for changes by polling `stat` every `interval_s` seconds until a change
+occurs or `timeout_s` seconds have elapsed. The `interval_s` should be a long period; the
+default is 5.007 seconds. Call `stat` on it to get the most recent, but old, result.
+
+This acts like an auto-reset Event, so calling `wait` blocks until the `stat` result has
+changed since the previous value captured upon entry to the `wait` call. The `wait` function
+will return a pair of status objects `(previous, current)` once any `stat` change is
+detected since the previous time that `wait` was called. The `previous` status is always a
+`StatStruct`, but it may have all of the fields zeroed (indicating the file didn't
+previously exist, or wasn't previously accessible).
+
+The `current` status object may be a `StatStruct`, an `EOFError` (if the wait is canceled by
+closing this object), or some other `Exception` subtype (if the `stat` operation failed: for
+example, if the path is removed). Note that `stat` value may be outdated if the file has
+changed again multiple times.
+
+Using [`FileMonitor`](@ref) for this operation is preferred, since it is more reliable and
+efficient, although in some situations it may not be available.
+"""
 mutable struct PollingFileWatcher
-    @atomic handle::Ptr{Cvoid}
     file::String
-    interval::UInt32
-    notify::Base.ThreadSynchronizer
-    active::Bool
-    curr_error::Int32
-    curr_stat::StatStruct
+    interval::Float64
+    const notify::Base.ThreadSynchronizer # lock protects all fields which can be changed (including interval and file, if you really must)
+    timer::Union{Nothing,Timer}
+    const stat_req::Memory{UInt8}
+    active::Bool # whether there is already an uv_fspollcb in-flight, so to speak
+    closed::Bool # whether the user has explicitly destroyed this
+    ioerrno::Int32 # the stat errno as of the last result
+    prev_stat::StatStruct # the stat as of the last successful result
     PollingFileWatcher(file::AbstractString, interval::Float64=5.007) = PollingFileWatcher(String(file), interval)
     function PollingFileWatcher(file::String, interval::Float64=5.007) # same default as nodejs
-        handle = Libc.malloc(_sizeof_uv_fs_poll)
-        this = new(handle, file, round(UInt32, interval * 1000), Base.ThreadSynchronizer(), false, 0, StatStruct())
-        associate_julia_struct(handle, this)
-        iolock_begin()
-        err = ccall(:uv_fs_poll_init, Int32, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
-        if err != 0
-            Libc.free(handle)
-            throw(_UVError("PollingFileWatcher", err))
-        end
-        finalizer(uvfinalize, this)
-        iolock_end()
+        stat_req = Memory{UInt8}(undef, Int(_sizeof_uv_fs))
+        this = new(file, interval, Base.ThreadSynchronizer(), nothing, stat_req, false, false, 0, StatStruct())
+        uv_req_set_data(stat_req, this)
+        wait(this) # initialize with the current stat before return
         return this
     end
 end
 
 mutable struct _FDWatcher
     @atomic handle::Ptr{Cvoid}
-    fdnum::Int # this is NOT the file descriptor
+    const fdnum::Int # this is NOT the file descriptor
     refcount::Tuple{Int, Int}
-    notify::Base.ThreadSynchronizer
+    const notify::Base.ThreadSynchronizer
     events::Int32
     active::Tuple{Bool, Bool}
 
@@ -274,9 +443,28 @@ mutable struct _FDWatcher
     end
 end
 
+"""
+    FDWatcher(fd::Union{RawFD,WindowsRawSocket}, readable::Bool, writable::Bool)
+
+Monitor a file descriptor `fd` for changes in the read or write availability.
+
+The keyword arguments determine which of read and/or write status should be monitored; at
+least one of them must be set to `true`.
+
+The returned value is an object with boolean fields `readable`, `writable`, and `timedout`,
+giving the result of the polling.
+
+This acts like a level-set event, so calling `wait` blocks until one of those conditions is
+met, but then continues to return without blocking until the condition is cleared (either
+there is no more to read, or no more space in the write buffer, or both).
+
+!!! warning
+    You must call `close` manually, when finished with this object, before the fd
+    argument is closed. Failure to do so risks serious crashes.
+"""
 mutable struct FDWatcher
     # WARNING: make sure `close` has been manually called on this watcher before closing / destroying `fd`
-    watcher::_FDWatcher
+    const watcher::_FDWatcher
     mask::FDEvent
     function FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
         return FDWatcher(fd, FDEvent(readable, writable, false, false))
@@ -327,7 +515,7 @@ function close(t::FDWatcher)
     close(t.watcher, mask)
 end
 
-function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+function uvfinalize(uv::Union{FileMonitor, FolderMonitor})
     iolock_begin()
     if uv.handle != C_NULL
         disassociate_julia_struct(uv) # close (and free) without notify
@@ -336,7 +524,7 @@ function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
     iolock_end()
 end
 
-function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+function close(t::Union{FileMonitor, FolderMonitor})
     iolock_begin()
     if t.handle != C_NULL
         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
@@ -344,6 +532,21 @@ function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
     iolock_end()
 end
 
+function close(pfw::PollingFileWatcher)
+    timer = nothing
+    lock(pfw.notify)
+    try
+        pfw.closed = true
+        notify(pfw.notify, false)
+        timer = pfw.timer
+        pfw.timer = nothing
+    finally
+        unlock(pfw.notify)
+    end
+    timer === nothing || close(timer)
+    nothing
+end
+
 function _uv_hook_close(uv::_FDWatcher)
     # fyi: jl_atexit_hook can cause this to get called too
     Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
@@ -351,24 +554,11 @@ function _uv_hook_close(uv::_FDWatcher)
     nothing
 end
 
-function _uv_hook_close(uv::PollingFileWatcher)
-    lock(uv.notify)
-    try
-        uv.active = false
-        Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
-        notify(uv.notify, StatStruct())
-    finally
-        unlock(uv.notify)
-    end
-    nothing
-end
-
 function _uv_hook_close(uv::FileMonitor)
     lock(uv.notify)
     try
-        uv.active = false
         Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
-        notify(uv.notify, FileEvent())
+        notify(uv.notify)
     finally
         unlock(uv.notify)
     end
@@ -388,179 +578,11 @@ end
 
 isopen(fm::FileMonitor) = fm.handle != C_NULL
 isopen(fm::FolderMonitor) = fm.handle != C_NULL
-isopen(pfw::PollingFileWatcher) = pfw.handle != C_NULL
+isopen(pfw::PollingFileWatcher) = !pfw.closed
 isopen(pfw::_FDWatcher) = pfw.refcount != (0, 0)
 isopen(pfw::FDWatcher) = !pfw.mask.timedout
 
-function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
-    t = @handle_as handle FileMonitor
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FileMonitor", status))
-        else
-            t.events |= events
-            notify(t.notify, FileEvent(events))
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
-    t = @handle_as handle FolderMonitor
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FolderMonitor", status))
-        else
-            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
-            push!(t.channel, fname => FileEvent(events))
-            notify(t.notify)
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
-    t = @handle_as handle _FDWatcher
-    lock(t.notify)
-    try
-        if status != 0
-            notify_error(t.notify, _UVError("FDWatcher", status))
-        else
-            t.events |= events
-            if t.active[1] || t.active[2]
-                if isempty(t.notify)
-                    # if we keep hearing about events when nobody appears to be listening,
-                    # stop the poll to save cycles
-                    t.active = (false, false)
-                    ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
-                end
-            end
-            notify(t.notify, events)
-        end
-    finally
-        unlock(t.notify)
-    end
-    nothing
-end
-
-function uv_fspollcb(handle::Ptr{Cvoid}, status::Int32, prev::Ptr, curr::Ptr)
-    t = @handle_as handle PollingFileWatcher
-    old_status = t.curr_error
-    t.curr_error = status
-    if status == 0
-        t.curr_stat = StatStruct(convert(Ptr{UInt8}, curr))
-    end
-    if status == 0 || status != old_status
-        prev_stat = StatStruct(convert(Ptr{UInt8}, prev))
-        lock(t.notify)
-        try
-            notify(t.notify, prev_stat)
-        finally
-            unlock(t.notify)
-        end
-    end
-    nothing
-end
-
-global uv_jl_pollcb::Ptr{Cvoid}
-global uv_jl_fspollcb::Ptr{Cvoid}
-global uv_jl_fseventscb_file::Ptr{Cvoid}
-global uv_jl_fseventscb_folder::Ptr{Cvoid}
-
-function __init__()
-    global uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
-    global uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Ptr{Cvoid}))
-    global uv_jl_fseventscb_file = @cfunction(uv_fseventscb_file, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
-    global uv_jl_fseventscb_folder = @cfunction(uv_fseventscb_folder, Cvoid, (Ptr{Cvoid}, Ptr{Int8}, Int32, Int32))
-
-    Base.mkpidlock_hook = mkpidlock
-    Base.trymkpidlock_hook = trymkpidlock
-    Base.parse_pidfile_hook = Pidfile.parse_pidfile
-
-    nothing
-end
-
-function start_watching(t::_FDWatcher)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
-    readable = t.refcount[1] > 0
-    writable = t.refcount[2] > 0
-    if t.active[1] != readable || t.active[2] != writable
-        # make sure the READABLE / WRITEABLE state is updated
-        uv_error("FDWatcher (start)",
-                 ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}),
-                       t.handle,
-                       (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0),
-                       uv_jl_pollcb::Ptr{Cvoid}))
-        t.active = (readable, writable)
-    end
-    iolock_end()
-    nothing
-end
-
-function start_watching(t::PollingFileWatcher)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("PollingFileWatcher is closed"))
-    if !t.active
-        uv_error("PollingFileWatcher (start)",
-                 ccall(:uv_fs_poll_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, UInt32),
-                       t.handle, uv_jl_fspollcb::Ptr{Cvoid}, t.file, t.interval))
-        t.active = true
-    end
-    iolock_end()
-    nothing
-end
-
-function stop_watching(t::PollingFileWatcher)
-    iolock_begin()
-    lock(t.notify)
-    try
-        if t.active && isempty(t.notify)
-            t.active = false
-            uv_error("PollingFileWatcher (stop)",
-                     ccall(:uv_fs_poll_stop, Int32, (Ptr{Cvoid},), t.handle))
-        end
-    finally
-        unlock(t.notify)
-    end
-    iolock_end()
-    nothing
-end
-
-function start_watching(t::FileMonitor)
-    iolock_begin()
-    t.handle == C_NULL && throw(ArgumentError("FileMonitor is closed"))
-    if !t.active
-        uv_error("FileMonitor (start)",
-                 ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
-                       t.handle, uv_jl_fseventscb_file::Ptr{Cvoid}, t.file, 0))
-        t.active = true
-    end
-    iolock_end()
-    nothing
-end
-
-function stop_watching(t::FileMonitor)
-    iolock_begin()
-    lock(t.notify)
-    try
-        if t.active && isempty(t.notify)
-            t.active = false
-            uv_error("FileMonitor (stop)",
-                     ccall(:uv_fs_event_stop, Int32, (Ptr{Cvoid},), t.handle))
-        end
-    finally
-        unlock(t.notify)
-    end
-    iolock_end()
-    nothing
-end
+Base.stat(pfw::PollingFileWatcher) = Base.checkstat(@lock pfw.notify pfw.prev_stat)
 
 # n.b. this _wait may return spuriously early with a timedout event
 function _wait(fdw::_FDWatcher, mask::FDEvent)
@@ -572,7 +594,20 @@ function _wait(fdw::_FDWatcher, mask::FDEvent)
         if !isopen(fdw) # !open
             throw(EOFError())
         elseif events.timedout
-            start_watching(fdw) # make sure the poll is active
+            fdw.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
+            # start_watching to make sure the poll is active
+            readable = fdw.refcount[1] > 0
+            writable = fdw.refcount[2] > 0
+            if fdw.active[1] != readable || fdw.active[2] != writable
+                # make sure the READABLE / WRITEABLE state is updated
+                uv_jl_pollcb = @cfunction(uv_pollcb, Cvoid, (Ptr{Cvoid}, Cint, Cint))
+                uv_error("FDWatcher (start)",
+                         ccall(:uv_poll_start, Int32, (Ptr{Cvoid}, Int32, Ptr{Cvoid}),
+                               fdw.handle,
+                               (readable ? UV_READABLE : 0) | (writable ? UV_WRITABLE : 0),
+                               uv_jl_pollcb::Ptr{Cvoid}))
+                fdw.active = (readable, writable)
+            end
             iolock_end()
             return FDEvent(wait(fdw.notify)::Int32)
         else
@@ -640,52 +675,88 @@ end
 
 function wait(pfw::PollingFileWatcher)
     iolock_begin()
-    preserve_handle(pfw)
     lock(pfw.notify)
-    local prevstat
+    prevstat = pfw.prev_stat
+    havechange = false
+    timer = nothing
     try
-        start_watching(pfw)
+        # we aren't too strict about the first interval after `wait`, but rather always
+        # check right away to see if it had immediately changed again, and then repeatedly
+        # after interval again until success
+        pfw.closed && throw(ArgumentError("PollingFileWatcher is closed"))
+        timer = pfw.timer
+        pfw.timer = nothing # disable Timer callback
+        # start_watching
+        if !pfw.active
+            preserve_handle(pfw)
+            uv_jl_fspollcb = @cfunction(uv_fspollcb, Cvoid, (Ptr{Cvoid},))
+            err = ccall(:uv_fs_stat, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                eventloop(), pfw.stat_req, pfw.file, uv_jl_fspollcb::Ptr{Cvoid})
+            err == 0 || uv_error("PollingFileWatcher (start)", err) # likely just ENOMEM
+            pfw.active = true
+        end
         iolock_end()
-        prevstat = wait(pfw.notify)::StatStruct
+        havechange = wait(pfw.notify)::Bool
         unlock(pfw.notify)
         iolock_begin()
-        lock(pfw.notify)
-    finally
-        unlock(pfw.notify)
-        unpreserve_handle(pfw)
+    catch
+        # stop_watching: cleanup any timers from before or after starting this wait before it failed, if there are no other watchers
+        latetimer = nothing
+        try
+            if isempty(pfw.notify)
+                latetimer = pfw.timer
+                pfw.timer = nothing
+            end
+        finally
+            unlock(pfw.notify)
+        end
+        if timer !== nothing || latetimer !== nothing
+            iolock_end()
+            timer === nothing || close(timer)
+            latetimer === nothing || close(latetimer)
+            iolock_begin()
+        end
+        rethrow()
     end
-    stop_watching(pfw)
     iolock_end()
-    if pfw.handle == C_NULL
+    timer === nothing || close(timer) # cleanup resources so we don't hang on exit
+    if !havechange # user canceled by calling close
         return prevstat, EOFError()
-    elseif pfw.curr_error != 0
-        return prevstat, _UVError("PollingFileWatcher", pfw.curr_error)
+    end
+    # grab the most up-to-date stat result as of this time, even if it was a bit newer than
+    # the notify call (unlikely, as there would need to be a concurrent call to wait)
+    lock(pfw.notify)
+    currstat = pfw.prev_stat
+    ioerrno = pfw.ioerrno
+    unlock(pfw.notify)
+    if ioerrno == 0
+        @assert currstat.ioerrno == 0
+        return prevstat, currstat
+    elseif ioerrno in (Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL)
+        return prevstat, StatStruct(pfw.file, Ptr{UInt8}(0), ioerrno)
     else
-        return prevstat, pfw.curr_stat
+        return prevstat, _UVError("PollingFileWatcher", ioerrno)
     end
 end
 
 function wait(m::FileMonitor)
-    iolock_begin()
+    m.handle == C_NULL && throw(EOFError())
     preserve_handle(m)
     lock(m.notify)
-    local events
     try
-        start_watching(m)
-        iolock_end()
-        events = wait(m.notify)::FileEvent
-        events |= FileEvent(m.events)
-        m.events = 0
-        unlock(m.notify)
-        iolock_begin()
-        lock(m.notify)
+        while true
+            m.handle == C_NULL && throw(EOFError())
+            events = @atomicswap :not_atomic m.events = 0
+            events == 0 || return FileEvent(events)
+            if m.ioerrno != 0
+                uv_error("FileMonitor", m.ioerrno)
+            end
+            wait(m.notify)
+        end
     finally
         unlock(m.notify)
         unpreserve_handle(m)
     end
-    stop_watching(m)
-    iolock_end()
-    return events
 end
 
 function wait(m::FolderMonitor)
@@ -704,6 +775,7 @@ function wait(m::FolderMonitor)
         end
     return evt::Pair{String, FileEvent}
 end
+Base.take!(m::FolderMonitor) = wait(m) # Channel-like API
 
 
 """
@@ -717,6 +789,10 @@ least one of them must be set to `true`.
 
 The returned value is an object with boolean fields `readable`, `writable`, and `timedout`,
 giving the result of the polling.
+
+This is a thin wrapper over calling `wait` on a [`FDWatcher`](@ref), which implements the
+functionality but requires the user to call `close` manually when finished with it, or risk
+serious crashes.
 """
 function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, timeout_s::Real=-1; readable=false, writable=false)
     mask = FDEvent(readable, writable, false, false)
@@ -774,6 +850,15 @@ giving the result of watching the file.
 
 This behavior of this function varies slightly across platforms. See
 <https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+
+This is a thin wrapper over calling `wait` on a [`FileMonitor`](@ref). This function has a
+small race window between consecutive calls to `watch_file` where the file might change
+without being detected. To avoid this race, use
+
+    fm = FileMonitor(path)
+    wait(fm)
+
+directly, re-using the same `fm` each time you `wait`.
 """
 function watch_file(s::String, timeout_s::Float64=-1.0)
     fm = FileMonitor(s)
@@ -784,7 +869,12 @@ function watch_file(s::String, timeout_s::Float64=-1.0)
                 close(fm)
             end
         end
-        return wait(fm)
+        try
+            return wait(fm)
+        catch ex
+            ex isa EOFError && return FileEvent()
+            rethrow()
+        end
     finally
         close(fm)
         @isdefined(timer) && close(timer)
@@ -795,7 +885,7 @@ watch_file(s::AbstractString, timeout_s::Real=-1) = watch_file(String(s), Float6
 """
     watch_folder(path::AbstractString, timeout_s::Real=-1)
 
-Watches a file or directory `path` for changes until a change has occurred or `timeout_s`
+Watch a file or directory `path` for changes until a change has occurred or `timeout_s`
 seconds have elapsed. This function does not poll the file system and instead uses platform-specific
 functionality to receive notifications from the operating system (e.g. via inotify on Linux).
 See the NodeJS documentation linked below for details.
@@ -809,10 +899,12 @@ giving the event.
 
 This behavior of this function varies slightly across platforms. See
 <https://nodejs.org/api/fs.html#fs_caveats> for more detailed information.
+
+This function is a thin wrapper over calling `wait` on a [`FolderMonitor`](@ref), with added timeout support.
 """
 watch_folder(s::AbstractString, timeout_s::Real=-1) = watch_folder(String(s), timeout_s)
 function watch_folder(s::String, timeout_s::Real=-1)
-    fm = get!(watched_folders, s) do
+    fm = @lock watched_folders get!(watched_folders[], s) do
         return FolderMonitor(s)
     end
     local timer
@@ -859,12 +951,12 @@ It is not recommended to do this while another task is waiting for
 """
 unwatch_folder(s::AbstractString) = unwatch_folder(String(s))
 function unwatch_folder(s::String)
-    fm = pop!(watched_folders, s, nothing)
+    fm = @lock watched_folders pop!(watched_folders[], s, nothing)
     fm === nothing || close(fm)
     nothing
 end
 
-const watched_folders = Dict{String, FolderMonitor}()
+const watched_folders = Lockable(Dict{String, FolderMonitor}())
 
 """
     poll_file(path::AbstractString, interval_s::Real=5.007, timeout_s::Real=-1) -> (previous::StatStruct, current)
@@ -878,11 +970,15 @@ The `previous` status is always a `StatStruct`, but it may have all of the field
 (indicating the file didn't previously exist, or wasn't previously accessible).
 
 The `current` status object may be a `StatStruct`, an `EOFError` (indicating the timeout elapsed),
-or some other `Exception` subtype (if the `stat` operation failed - for example, if the path does not exist).
+or some other `Exception` subtype (if the `stat` operation failed: for example, if the path does not exist).
+
+To determine when a file was modified, compare `!(current isa StatStruct && prev == current)` to detect
+notification of changes to the mtime or inode. However, using [`watch_file`](@ref) for this operation
+is preferred, since it is more reliable and efficient, although in some situations it may not be available.
 
-To determine when a file was modified, compare `current isa StatStruct && mtime(prev) != mtime(current)` to detect
-notification of changes. However, using [`watch_file`](@ref) for this operation is preferred, since
-it is more reliable and efficient, although in some situations it may not be available.
+This is a thin wrapper over calling `wait` on a [`PollingFileWatcher`](@ref), which implements
+the functionality, but this function has a small race window between consecutive calls to
+`poll_file` where the file might change without being detected.
 """
 function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::Real=-1)
     pfw = PollingFileWatcher(s, Float64(interval_seconds))
@@ -893,12 +989,7 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R
                 close(pfw)
             end
         end
-        statdiff = wait(pfw)
-        if isa(statdiff[2], IOError)
-            # file didn't initially exist, continue watching for it to be created (or the error to change)
-            statdiff = wait(pfw)
-        end
-        return statdiff
+        return wait(pfw)
     finally
         close(pfw)
         @isdefined(timer) && close(timer)
@@ -908,4 +999,11 @@ end
 include("pidfile.jl")
 import .Pidfile: mkpidlock, trymkpidlock
 
+function __init__()
+    Base.mkpidlock_hook = mkpidlock
+    Base.trymkpidlock_hook = trymkpidlock
+    Base.parse_pidfile_hook = Pidfile.parse_pidfile
+    nothing
+end
+
 end
diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl
index 4c821a3d897e4..95b8f20face29 100644
--- a/stdlib/FileWatching/src/pidfile.jl
+++ b/stdlib/FileWatching/src/pidfile.jl
@@ -4,14 +4,14 @@ module Pidfile
 export mkpidlock, trymkpidlock
 
 using Base:
-    IOError, UV_EEXIST, UV_ESRCH,
+    IOError, UV_EEXIST, UV_ESRCH, UV_ENOENT,
     Process
 
 using Base.Filesystem:
     File, open, JL_O_CREAT, JL_O_RDWR, JL_O_RDONLY, JL_O_EXCL,
     rename, samefile, path_separator
 
-using ..FileWatching: watch_file
+using ..FileWatching: FileMonitor
 using Base.Sys: iswindows
 
 """
@@ -256,19 +256,43 @@ function open_exclusive(path::String;
         end
     end
     # fall-back: wait for the lock
-
+    watch = Lockable(Core.Box(nothing))
     while true
-        # start the file-watcher prior to checking for the pidfile existence
-        t = @async try
-            watch_file(path, poll_interval)
+        # now try again to create it
+        # try to start the file-watcher prior to checking for the pidfile existence
+        watch = try
+            FileMonitor(path)
         catch ex
             isa(ex, IOError) || rethrow(ex)
-            sleep(poll_interval) # if the watch failed, convert to just doing a sleep
+            ex.code != UV_ENOENT # if the file was deleted in the meantime, don't sleep at all, even if the lock fails
+        end
+        timeout = nothing
+        if watch isa FileMonitor && stale_age > 0
+            let watch = watch
+                timeout = Timer(stale_age) do t
+                    close(watch)
+                end
+            end
+        end
+        try
+            file = tryopen_exclusive(path, mode)
+            file === nothing || return file
+            if watch isa FileMonitor
+                try
+                    Base.wait(watch) # will time-out after stale_age passes
+                catch ex
+                    isa(ex, EOFError) || isa(ex, IOError) || rethrow(ex)
+                end
+            end
+            if watch === true # if the watch failed, convert to just doing a sleep
+                sleep(poll_interval)
+            end
+        finally
+            # something changed about the path, so watch is now possibly monitoring the wrong file handle
+            # it will need to be recreated just before the next tryopen_exclusive attempt
+            timeout isa Timer && close(timeout)
+            watch isa FileMonitor && close(watch)
         end
-        # now try again to create it
-        file = tryopen_exclusive(path, mode)
-        file === nothing || return file
-        Base.wait(t) # sleep for a bit before trying again
         if stale_age > 0 && stale_pidfile(path, stale_age, refresh)
             # if the file seems stale, try to remove it before attempting again
             # set stale_age to zero so we won't attempt again, even if the attempt fails
diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl
index 2592aea024386..def555154264d 100644
--- a/stdlib/FileWatching/test/runtests.jl
+++ b/stdlib/FileWatching/test/runtests.jl
@@ -2,6 +2,7 @@
 
 using Test, FileWatching
 using Base: uv_error, Experimental
+using Base.Filesystem: StatStruct
 
 @testset "FileWatching" begin
 
@@ -168,12 +169,13 @@ file = joinpath(dir, "afile.txt")
 
 # initialize a watch_folder instance and create afile.txt
 function test_init_afile()
-    @test isempty(FileWatching.watched_folders)
+    watched_folders = FileWatching.watched_folders
+    @test @lock watched_folders isempty(watched_folders[])
     @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))
     @test @elapsed(@test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))) <= 0.5
-    @test length(FileWatching.watched_folders) == 1
+    @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1
     @test unwatch_folder(dir) === nothing
-    @test isempty(FileWatching.watched_folders)
+    @test @lock watched_folders isempty(watched_folders[])
     @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent())))
     @test 0.002 <= @elapsed(@test(watch_folder(dir, 0.004) == ("" => FileWatching.FileEvent()))) <= 0.5
     @test unwatch_folder(dir) === nothing
@@ -203,7 +205,7 @@ function test_init_afile()
     @test unwatch_folder(dir) === nothing
     @test(watch_folder(dir, 0) == ("" => FileWatching.FileEvent()))
     @test 0.9 <= @elapsed(@test(watch_folder(dir, 1) == ("" => FileWatching.FileEvent())))
-    @test length(FileWatching.watched_folders) == 1
+    @test @lock(watched_folders, length(FileWatching.watched_folders[])) == 1
     nothing
 end
 
@@ -218,7 +220,7 @@ function test_timeout(tval)
         @async test_file_poll(channel, 10, tval)
         tr = take!(channel)
     end
-    @test tr[1] === Base.Filesystem.StatStruct() && tr[2] === EOFError()
+    @test ispath(tr[1]::StatStruct) && tr[2] === EOFError()
     @test tval <= t_elapsed
 end
 
@@ -231,7 +233,7 @@ function test_touch(slval)
     write(f, "Hello World\n")
     close(f)
     tr = take!(channel)
-    @test ispath(tr[1]) && ispath(tr[2])
+    @test ispath(tr[1]::StatStruct) && ispath(tr[2]::StatStruct)
     fetch(t)
 end
 
@@ -435,11 +437,11 @@ end
 @test_throws(Base._UVError("FolderMonitor (start)", Base.UV_ENOENT),
              watch_folder("____nonexistent_file", 10))
 @test(@elapsed(
-    @test(poll_file("____nonexistent_file", 1, 3.1) ===
-          (Base.Filesystem.StatStruct(), EOFError()))) > 3)
+    @test(poll_file("____nonexistent_file", 1, 3.1) ==
+          (StatStruct(), EOFError()))) > 3)
 
 unwatch_folder(dir)
-@test isempty(FileWatching.watched_folders)
+@test @lock FileWatching.watched_folders isempty(FileWatching.watched_folders[])
 rm(file)
 rm(dir)
 
@@ -450,10 +452,6 @@ rm(dir)
     include("pidfile.jl")
 end
 
-@testset "Docstrings" begin
-    undoc = Docs.undocumented_names(FileWatching)
-    @test_broken isempty(undoc)
-    @test undoc == [:FDWatcher, :FileMonitor, :FolderMonitor, :PollingFileWatcher]
-end
+@test isempty(Docs.undocumented_names(FileWatching))
 
 end # testset
diff --git a/stdlib/InteractiveUtils/docs/src/index.md b/stdlib/InteractiveUtils/docs/src/index.md
index dbfb42b9a931d..69b68a27e4e81 100644
--- a/stdlib/InteractiveUtils/docs/src/index.md
+++ b/stdlib/InteractiveUtils/docs/src/index.md
@@ -33,5 +33,7 @@ InteractiveUtils.@code_llvm
 InteractiveUtils.code_native
 InteractiveUtils.@code_native
 InteractiveUtils.@time_imports
+InteractiveUtils.@trace_compile
+InteractiveUtils.@trace_dispatch
 InteractiveUtils.clipboard
 ```
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index 835988ddf149f..f3c1ff7fba59f 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -11,7 +11,7 @@ Base.Experimental.@optlevel 1
 
 export apropos, edit, less, code_warntype, code_llvm, code_native, methodswith, varinfo,
     versioninfo, subtypes, supertypes, @which, @edit, @less, @functionloc, @code_warntype,
-    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard
+    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard, @trace_compile, @trace_dispatch
 
 import Base.Docs.apropos
 
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index bb56c47b4f9ca..211687df47954 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -256,6 +256,28 @@ macro time_imports(ex)
     end
 end
 
+macro trace_compile(ex)
+    quote
+        try
+            ccall(:jl_force_trace_compile_timing_enable, Cvoid, ())
+            $(esc(ex))
+        finally
+            ccall(:jl_force_trace_compile_timing_disable, Cvoid, ())
+        end
+    end
+end
+
+macro trace_dispatch(ex)
+    quote
+        try
+            ccall(:jl_force_trace_dispatch_enable, Cvoid, ())
+            $(esc(ex))
+        finally
+            ccall(:jl_force_trace_dispatch_disable, Cvoid, ())
+        end
+    end
+end
+
 """
     @functionloc
 
@@ -409,3 +431,36 @@ julia> @time_imports using CSV
 
 """
 :@time_imports
+
+"""
+    @trace_compile
+
+A macro to execute an expression and show any methods that were compiled (or recompiled in yellow),
+like the julia args `--trace-compile=stderr --trace-compile-timing` but specifically for a call.
+
+```julia-repl
+julia> @trace_compile rand(2,2) * rand(2,2)
+#=   39.1 ms =# precompile(Tuple{typeof(Base.rand), Int64, Int64})
+#=  102.0 ms =# precompile(Tuple{typeof(Base.:(*)), Array{Float64, 2}, Array{Float64, 2}})
+2×2 Matrix{Float64}:
+ 0.421704  0.864841
+ 0.211262  0.444366
+```
+
+!!! compat "Julia 1.12"
+    This macro requires at least Julia 1.12
+
+"""
+:@trace_compile
+
+"""
+    @trace_dispatch
+
+A macro to execute an expression and report methods that were compiled via dynamic dispatch,
+like the julia arg `--trace-dispatch=stderr` but specifically for a call.
+
+!!! compat "Julia 1.12"
+    This macro requires at least Julia 1.12
+
+"""
+:@trace_dispatch
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index b000f353443c4..851391ec6c249 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -394,13 +394,21 @@ let errf = tempname(),
     try
         redirect_stderr(new_stderr)
         @test occursin("f_broken_code", sprint(code_native, h_broken_code, ()))
+        Libc.flush_cstdio()
         println(new_stderr, "start")
         flush(new_stderr)
-        @test_throws "could not compile the specified method" sprint(code_native, f_broken_code, ())
+        @test_throws "could not compile the specified method" sprint(io -> code_native(io, f_broken_code, (), dump_module=true))
         Libc.flush_cstdio()
-        println(new_stderr, "end")
+        println(new_stderr, "middle")
+        flush(new_stderr)
+        @test !isempty(sprint(io -> code_native(io, f_broken_code, (), dump_module=false)))
+        Libc.flush_cstdio()
+        println(new_stderr, "later")
         flush(new_stderr)
         @test invokelatest(g_broken_code) == 0
+        Libc.flush_cstdio()
+        println(new_stderr, "end")
+        flush(new_stderr)
     finally
         Libc.flush_cstdio()
         redirect_stderr(old_stderr)
@@ -410,6 +418,14 @@ let errf = tempname(),
                 Internal error: encountered unexpected error during compilation of f_broken_code:
                 ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
                 """) || errstr
+            @test occursin("""\nmiddle
+                Internal error: encountered unexpected error during compilation of f_broken_code:
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
+                """, errstr) || errstr
+            @test occursin("""\nlater
+                Internal error: encountered unexpected error during compilation of f_broken_code:
+                ErrorException(\"unsupported or misplaced expression \\\"invalid\\\" in function f_broken_code\")
+                """, errstr) || errstr
             @test endswith(errstr, "\nend\n") || errstr
         end
         rm(errf)
@@ -531,9 +547,9 @@ if Sys.ARCH === :x86_64 || occursin(ix86, string(Sys.ARCH))
     output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test !occursin(rgx, output)
 
-    code_native(buf, linear_foo, ())
-    output = String(take!(buf))
-    @test occursin(rgx, output)
+    code_native(buf, linear_foo, (), debuginfo = :none)
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
+    @test !occursin(rgx, output)
 
     @testset "binary" begin
         # check the RET instruction (opcode: C3)
@@ -692,7 +708,7 @@ let
         length((@code_lowered sum(1:10)).code)
 end
 
-@testset "@time_imports" begin
+@testset "@time_imports, @trace_compile, @trace_dispatch" begin
     mktempdir() do dir
         cd(dir) do
             try
@@ -701,7 +717,16 @@ end
                 write(foo_file,
                     """
                     module Foo3242
-                    foo() = 1
+                    function foo()
+                        Base.Experimental.@force_compile
+                        foo(1)
+                    end
+                    foo(x) = x
+                    function bar()
+                        Base.Experimental.@force_compile
+                        bar(1)
+                    end
+                    bar(x) = x
                     end
                     """)
 
@@ -718,6 +743,27 @@ end
 
                 @test occursin("ms  Foo3242", String(buf))
 
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stderr(f) do
+                    @trace_compile @eval Foo3242.foo()
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("ms =# precompile(", String(buf))
+
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stderr(f) do
+                    @trace_dispatch @eval Foo3242.bar()
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("precompile(", String(buf))
             finally
                 filter!((≠)(dir), LOAD_PATH)
             end
diff --git a/stdlib/JuliaSyntaxHighlighting.version b/stdlib/JuliaSyntaxHighlighting.version
index 4a819d056e70e..280db66afe5f9 100644
--- a/stdlib/JuliaSyntaxHighlighting.version
+++ b/stdlib/JuliaSyntaxHighlighting.version
@@ -1,4 +1,4 @@
 JULIASYNTAXHIGHLIGHTING_BRANCH = main
-JULIASYNTAXHIGHLIGHTING_SHA1 = 4110caaf4fcdf0c614fd3ecd7c5bf589ca82ac63
+JULIASYNTAXHIGHLIGHTING_SHA1 = b89dd99db56700c47434df6106b6c6afd1c9ed01
 JULIASYNTAXHIGHLIGHTING_GIT_URL := https://github.com/julialang/JuliaSyntaxHighlighting.jl.git
 JULIASYNTAXHIGHLIGHTING_TAR_URL = https://api.github.com/repos/julialang/JuliaSyntaxHighlighting.jl/tarball/$1
diff --git a/stdlib/LLD_jll/Project.toml b/stdlib/LLD_jll/Project.toml
index 4f5e3a6659745..6a6cc72aa3c62 100644
--- a/stdlib/LLD_jll/Project.toml
+++ b/stdlib/LLD_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLD_jll"
 uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
-version = "17.0.6+4"
+version = "18.1.7+2"
 
 [deps]
 Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
@@ -10,7 +10,7 @@ Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
 julia = "1.11"
-libLLVM_jll = "17.0.6"
+libLLVM_jll = "18.1.7"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/LLVMLibUnwind_jll/Project.toml b/stdlib/LLVMLibUnwind_jll/Project.toml
index 36c24111d4d31..0cb0fe5440066 100644
--- a/stdlib/LLVMLibUnwind_jll/Project.toml
+++ b/stdlib/LLVMLibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LLVMLibUnwind_jll"
 uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
-version = "12.0.1+0"
+version = "14.0.6+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibGit2/src/blame.jl b/stdlib/LibGit2/src/blame.jl
index 89071ea9c6f79..e441189bdd423 100644
--- a/stdlib/LibGit2/src/blame.jl
+++ b/stdlib/LibGit2/src/blame.jl
@@ -13,7 +13,7 @@ function GitBlame(repo::GitRepo, path::AbstractString; options::BlameOptions=Bla
     blame_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_blame_file, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{BlameOptions}),
-                   blame_ptr_ptr, repo.ptr, path, Ref(options))
+                   blame_ptr_ptr, repo, path, Ref(options))
     return GitBlame(repo, blame_ptr_ptr[])
 end
 
@@ -27,7 +27,7 @@ that function later.
 """
 function counthunks(blame::GitBlame)
     ensure_initialized()
-    return ccall((:git_blame_get_hunk_count, libgit2), Int32, (Ptr{Cvoid},), blame.ptr)
+    return ccall((:git_blame_get_hunk_count, libgit2), Int32, (Ptr{Cvoid},), blame)
 end
 
 function Base.getindex(blame::GitBlame, i::Integer)
diff --git a/stdlib/LibGit2/src/blob.jl b/stdlib/LibGit2/src/blob.jl
index 1941989b5f529..af1a16574b51e 100644
--- a/stdlib/LibGit2/src/blob.jl
+++ b/stdlib/LibGit2/src/blob.jl
@@ -2,7 +2,7 @@
 
 function Base.length(blob::GitBlob)
     ensure_initialized()
-    return ccall((:git_blob_rawsize, libgit2), Int64, (Ptr{Cvoid},), blob.ptr)
+    return ccall((:git_blob_rawsize, libgit2), Int64, (Ptr{Cvoid},), blob)
 end
 
 """
@@ -20,7 +20,7 @@ is binary and not valid Unicode.
 """
 function rawcontent(blob::GitBlob)
     ensure_initialized()
-    ptr = ccall((:git_blob_rawcontent, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob.ptr)
+    ptr = ccall((:git_blob_rawcontent, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), blob)
     copy(unsafe_wrap(Array, ptr, (length(blob),), own = false))
 end
 
@@ -47,7 +47,7 @@ the first 8000 bytes.
 """
 function isbinary(blob::GitBlob)
     ensure_initialized()
-    bin_flag = ccall((:git_blob_is_binary, libgit2), Cint, (Ptr{Cvoid},), blob.ptr)
+    bin_flag = ccall((:git_blob_is_binary, libgit2), Cint, (Ptr{Cvoid},), blob)
     return bin_flag == 1
 end
 
@@ -69,7 +69,7 @@ function addblob!(repo::GitRepo, path::AbstractString)
     id_ref = Ref{GitHash}()
     @check ccall((:git_blob_create_from_disk, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
-                 id_ref, repo.ptr, path)
+                 id_ref, repo, path)
     return id_ref[]
 end
 
diff --git a/stdlib/LibGit2/src/commit.jl b/stdlib/LibGit2/src/commit.jl
index ceb56ee45d3b7..d76a31791e4c4 100644
--- a/stdlib/LibGit2/src/commit.jl
+++ b/stdlib/LibGit2/src/commit.jl
@@ -73,16 +73,18 @@ function commit(repo::GitRepo,
     ensure_initialized()
     commit_id_ptr = Ref(GitHash())
     nparents = length(parents)
-    parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
-    @check ccall((:git_commit_create, libgit2), Cint,
-                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
-                  Ptr{SignatureStruct}, Ptr{SignatureStruct},
-                  Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
-                  Csize_t, Ptr{Ptr{Cvoid}}),
-                 commit_id_ptr, repo.ptr, isempty(refname) ? C_NULL : refname,
-                 author.ptr, committer.ptr,
-                 C_NULL, msg, tree.ptr,
-                 nparents, nparents > 0 ? parentptrs : C_NULL)
+    GC.@preserve parents begin
+        parentptrs = Ptr{Cvoid}[c.ptr for c in parents]
+        @check ccall((:git_commit_create, libgit2), Cint,
+                     (Ptr{GitHash}, Ptr{Cvoid}, Ptr{UInt8},
+                      Ptr{SignatureStruct}, Ptr{SignatureStruct},
+                      Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid},
+                      Csize_t, Ptr{Ptr{Cvoid}}),
+                     commit_id_ptr, repo, isempty(refname) ? C_NULL : refname,
+                     author, committer,
+                     C_NULL, msg, tree,
+                     nparents, nparents > 0 ? parentptrs : C_NULL)
+    end
     return commit_id_ptr[]
 end
 
diff --git a/stdlib/LibGit2/src/config.jl b/stdlib/LibGit2/src/config.jl
index affe881abde08..0bee705259ca6 100644
--- a/stdlib/LibGit2/src/config.jl
+++ b/stdlib/LibGit2/src/config.jl
@@ -35,7 +35,7 @@ function GitConfig(repo::GitRepo)
     ensure_initialized()
     cfg_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_repository_config, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo.ptr)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), cfg_ptr_ptr, repo)
     return GitConfig(repo, cfg_ptr_ptr[])
 end
 
@@ -58,7 +58,7 @@ function GitConfig(level::Consts.GIT_CONFIG = Consts.CONFIG_LEVEL_DEFAULT)
         try
             @check ccall((:git_config_open_level, libgit2), Cint,
                          (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint),
-                          glb_cfg_ptr_ptr, cfg.ptr, Cint(level))
+                          glb_cfg_ptr_ptr, cfg, Cint(level))
             cfg = GitConfig(glb_cfg_ptr_ptr[])
         finally
             close(tmpcfg)
@@ -91,13 +91,13 @@ function addfile(cfg::GitConfig, path::AbstractString,
     ensure_initialized()
     @static if LibGit2.VERSION >= v"0.27.0"
         @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
-                     (Ptr{Ptr{Cvoid}}, Cstring, Cint, Ptr{Cvoid}, Cint),
-                     cfg.ptr, path, Cint(level), isa(repo, GitRepo) ? repo.ptr : C_NULL, Cint(force))
+                     (Ptr{Cvoid}, Cstring, Cint, Ptr{Cvoid}, Cint),
+                     cfg, path, Cint(level), isa(repo, GitRepo) ? repo : C_NULL, Cint(force))
     else
         repo === nothing || error("repo argument is not supported in this version of LibGit2")
         @check ccall((:git_config_add_file_ondisk, libgit2), Cint,
-                     (Ptr{Ptr{Cvoid}}, Cstring, Cint, Cint),
-                     cfg.ptr, path, Cint(level), Cint(force))
+                     (Ptr{Cvoid}, Cstring, Cint, Cint),
+                     cfg, path, Cint(level), Cint(force))
     end
 end
 
@@ -105,7 +105,7 @@ function get(::Type{<:AbstractString}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     buf_ref = Ref(Buffer())
     @check ccall((:git_config_get_string_buf, libgit2), Cint,
-                 (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c.ptr, name)
+                 (Ptr{Buffer}, Ptr{Cvoid}, Cstring), buf_ref, c, name)
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -116,7 +116,7 @@ function get(::Type{Bool}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
     @check ccall((:git_config_get_bool, libgit2), Cint,
-          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return Bool(val_ptr[])
 end
 
@@ -124,7 +124,7 @@ function get(::Type{Int32}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cint(0))
     @check ccall((:git_config_get_int32, libgit2), Cint,
-          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+          (Ptr{Cint}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return val_ptr[]
 end
 
@@ -132,7 +132,7 @@ function get(::Type{Int64}, c::GitConfig, name::AbstractString)
     ensure_initialized()
     val_ptr = Ref(Cintmax_t(0))
     @check ccall((:git_config_get_int64, libgit2), Cint,
-          (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c.ptr, name)
+          (Ptr{Cintmax_t}, Ptr{Cvoid}, Cstring), val_ptr, c, name)
     return val_ptr[]
 end
 
@@ -165,33 +165,33 @@ end
 function set!(c::GitConfig, name::AbstractString, value::AbstractString)
     ensure_initialized()
     @check ccall((:git_config_set_string, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cstring), c.ptr, name, value)
+                  (Ptr{Cvoid}, Cstring, Cstring), c, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Bool)
     ensure_initialized()
     bval = Int32(value)
     @check ccall((:git_config_set_bool, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, bval)
+                  (Ptr{Cvoid}, Cstring, Cint), c, name, bval)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int32)
     ensure_initialized()
     @check ccall((:git_config_set_int32, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cint), c.ptr, name, value)
+                  (Ptr{Cvoid}, Cstring, Cint), c, name, value)
 end
 
 function set!(c::GitConfig, name::AbstractString, value::Int64)
     ensure_initialized()
     @check ccall((:git_config_set_int64, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring, Cintmax_t), c.ptr, name, value)
+                  (Ptr{Cvoid}, Cstring, Cintmax_t), c, name, value)
 end
 
 function GitConfigIter(cfg::GitConfig)
     ensure_initialized()
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_config_iterator_new, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg.ptr)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), ci_ptr, cfg)
     return GitConfigIter(ci_ptr[])
 end
 
@@ -200,7 +200,7 @@ function GitConfigIter(cfg::GitConfig, name::AbstractString)
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                  ci_ptr, cfg.ptr, name, C_NULL)
+                  ci_ptr, cfg, name, C_NULL)
     return GitConfigIter(ci_ptr[])
 end
 
@@ -209,7 +209,7 @@ function GitConfigIter(cfg::GitConfig, name::AbstractString, value::Regex)
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_config_multivar_iterator_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                  ci_ptr, cfg.ptr, name, value.pattern)
+                  ci_ptr, cfg, name, value.pattern)
     return GitConfigIter(ci_ptr[])
 end
 
@@ -218,7 +218,7 @@ function GitConfigIter(cfg::GitConfig, name::Regex)
     ci_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_config_iterator_glob_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                  ci_ptr, cfg.ptr, name.pattern)
+                  ci_ptr, cfg, name.pattern)
     return GitConfigIter(ci_ptr[])
 end
 
@@ -226,7 +226,7 @@ function Base.iterate(ci::GitConfigIter, state=nothing)
     ensure_initialized()
     entry_ptr_ptr = Ref{Ptr{ConfigEntry}}(C_NULL)
     err = ccall((:git_config_next, libgit2), Cint,
-                 (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci.ptr)
+                 (Ptr{Ptr{ConfigEntry}}, Ptr{Cvoid}), entry_ptr_ptr, ci)
     if err == Cint(Error.GIT_OK)
         return (unsafe_load(entry_ptr_ptr[]), nothing)
     elseif err == Cint(Error.ITEROVER)
diff --git a/stdlib/LibGit2/src/diff.jl b/stdlib/LibGit2/src/diff.jl
index 044c6331dc1f1..a3f2cafe62e96 100644
--- a/stdlib/LibGit2/src/diff.jl
+++ b/stdlib/LibGit2/src/diff.jl
@@ -29,11 +29,11 @@ function diff_tree(repo::GitRepo, tree::GitTree, pathspecs::AbstractString=""; c
     if cached
         @check ccall((:git_diff_tree_to_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                     diff_ptr_ptr, repo.ptr, tree.ptr, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
+                     diff_ptr_ptr, repo, tree, C_NULL, isempty(pathspecs) ? C_NULL : pathspecs)
     else
         @check ccall((:git_diff_tree_to_workdir_with_index, libgit2), Cint,
                      (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                     diff_ptr_ptr, repo.ptr, tree.ptr, isempty(pathspecs) ? C_NULL : pathspecs)
+                     diff_ptr_ptr, repo, tree, isempty(pathspecs) ? C_NULL : pathspecs)
     end
     return GitDiff(repo, diff_ptr_ptr[])
 end
@@ -53,7 +53,7 @@ function diff_tree(repo::GitRepo, oldtree::GitTree, newtree::GitTree)
     diff_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_diff_tree_to_tree, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{DiffOptionsStruct}),
-                   diff_ptr_ptr, repo.ptr, oldtree.ptr, newtree.ptr, C_NULL)
+                   diff_ptr_ptr, repo, oldtree, newtree, C_NULL)
     return GitDiff(repo, diff_ptr_ptr[])
 end
 
@@ -69,7 +69,7 @@ function GitDiffStats(diff::GitDiff)
     diff_stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_diff_get_stats, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}),
-                  diff_stat_ptr_ptr, diff.ptr)
+                  diff_stat_ptr_ptr, diff)
     return GitDiffStats(diff.owner, diff_stat_ptr_ptr[])
 end
 
@@ -83,7 +83,7 @@ are to be included or not).
 """
 function files_changed(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_files_changed, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_files_changed, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 """
@@ -96,7 +96,7 @@ are to be included or not).
 """
 function insertions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_insertions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_insertions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 """
@@ -109,12 +109,12 @@ are to be included or not).
 """
 function deletions(diff_stat::GitDiffStats)
     ensure_initialized()
-    return ccall((:git_diff_stats_deletions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat.ptr)
+    return ccall((:git_diff_stats_deletions, libgit2), Csize_t, (Ptr{Cvoid},), diff_stat)
 end
 
 function count(diff::GitDiff)
     ensure_initialized()
-    return ccall((:git_diff_num_deltas, libgit2), Cint, (Ptr{Cvoid},), diff.ptr)
+    return ccall((:git_diff_num_deltas, libgit2), Cint, (Ptr{Cvoid},), diff)
 end
 
 function Base.getindex(diff::GitDiff, i::Integer)
@@ -122,10 +122,12 @@ function Base.getindex(diff::GitDiff, i::Integer)
         throw(BoundsError(diff, (i,)))
     end
     ensure_initialized()
-    delta_ptr = ccall((:git_diff_get_delta, libgit2),
-                      Ptr{DiffDelta},
-                      (Ptr{Cvoid}, Csize_t), diff.ptr, i-1)
-    return unsafe_load(delta_ptr)
+    GC.@preserve diff begin # preserve `diff` object until return of `unsafe_load`
+        delta_ptr = ccall((:git_diff_get_delta, libgit2),
+                          Ptr{DiffDelta},
+                          (Ptr{Cvoid}, Csize_t), diff, i-1)
+        return unsafe_load(delta_ptr)
+    end
 end
 
 function Base.show(io::IO, diff_stat::GitDiffStats)
diff --git a/stdlib/LibGit2/src/index.jl b/stdlib/LibGit2/src/index.jl
index 15e04d16b5756..81e8e75d59585 100644
--- a/stdlib/LibGit2/src/index.jl
+++ b/stdlib/LibGit2/src/index.jl
@@ -9,7 +9,7 @@ function GitIndex(repo::GitRepo)
     ensure_initialized()
     idx_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_repository_index, libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo.ptr)
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), idx_ptr_ptr, repo)
     return GitIndex(repo, idx_ptr_ptr[])
 end
 
@@ -25,7 +25,7 @@ has changed since the last time it was loaded into `idx`.
 """
 function read!(idx::GitIndex, force::Bool = false)
     ensure_initialized()
-    @check ccall((:git_index_read, libgit2), Cint, (Ptr{Cvoid}, Cint), idx.ptr, Cint(force))
+    @check ccall((:git_index_read, libgit2), Cint, (Ptr{Cvoid}, Cint), idx, Cint(force))
     return idx
 end
 
@@ -36,7 +36,7 @@ Write the state of index `idx` to disk using a file lock.
 """
 function write!(idx::GitIndex)
     ensure_initialized()
-    @check ccall((:git_index_write, libgit2), Cint, (Ptr{Cvoid},), idx.ptr)
+    @check ccall((:git_index_write, libgit2), Cint, (Ptr{Cvoid},), idx)
     return idx
 end
 
@@ -52,7 +52,7 @@ function write_tree!(idx::GitIndex)
     ensure_initialized()
     oid_ptr = Ref(GitHash())
     @check ccall((:git_index_write_tree, libgit2), Cint,
-                 (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx.ptr)
+                 (Ptr{GitHash}, Ptr{Cvoid}), oid_ptr, idx)
     return oid_ptr[]
 end
 
@@ -74,7 +74,7 @@ Read the tree `tree` (or the tree pointed to by `treehash` in the repository own
 function read_tree!(idx::GitIndex, tree::GitTree)
     ensure_initialized()
     @check ccall((:git_index_read_tree, libgit2), Cint,
-                 (Ptr{Cvoid}, Ptr{Cvoid}), idx.ptr, tree.ptr)
+                 (Ptr{Cvoid}, Ptr{Cvoid}), idx, tree)
 end
 read_tree!(idx::GitIndex, hash::AbstractGitHash) =
     read_tree!(idx, GitTree(repository(idx), hash))
@@ -106,7 +106,7 @@ function add!(idx::GitIndex, files::AbstractString...;
     ensure_initialized()
     @check ccall((:git_index_add_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Cuint, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), flags, C_NULL, C_NULL)
+                 idx, collect(files), flags, C_NULL, C_NULL)
 end
 
 """
@@ -122,7 +122,7 @@ function update!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
     @check ccall((:git_index_update_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), C_NULL, C_NULL)
+                 idx, collect(files), C_NULL, C_NULL)
 end
 
 """
@@ -136,7 +136,7 @@ function remove!(idx::GitIndex, files::AbstractString...)
     ensure_initialized()
     @check ccall((:git_index_remove_all, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 idx.ptr, collect(files), C_NULL, C_NULL)
+                 idx, collect(files), C_NULL, C_NULL)
 end
 
 function add!(repo::GitRepo, files::AbstractString...;
@@ -173,7 +173,7 @@ end
 
 function count(idx::GitIndex)
     ensure_initialized()
-    return ccall((:git_index_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), idx.ptr)
+    return ccall((:git_index_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), idx)
 end
 
 function Base.getindex(idx::GitIndex, i::Integer)
@@ -192,7 +192,7 @@ function Base.findall(path::String, idx::GitIndex)
     ensure_initialized()
     pos_ref = Ref{Csize_t}(0)
     ret = ccall((:git_index_find, libgit2), Cint,
-                  (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx.ptr, path)
+                  (Ref{Csize_t}, Ptr{Cvoid}, Cstring), pos_ref, idx, path)
     ret == Cint(Error.ENOTFOUND) && return nothing
     return pos_ref[]+1
 end
diff --git a/stdlib/LibGit2/src/merge.jl b/stdlib/LibGit2/src/merge.jl
index 7c946315fdd86..8bd8d1e4b64e9 100644
--- a/stdlib/LibGit2/src/merge.jl
+++ b/stdlib/LibGit2/src/merge.jl
@@ -18,7 +18,7 @@ function GitAnnotated(repo::GitRepo, commit_id::GitHash)
     ann_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_annotated_commit_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}),
-                   ann_ptr_ptr, repo.ptr, Ref(commit_id))
+                   ann_ptr_ptr, repo, Ref(commit_id))
     return GitAnnotated(repo, ann_ptr_ptr[])
 end
 
@@ -27,7 +27,7 @@ function GitAnnotated(repo::GitRepo, ref::GitReference)
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_annotated_commit_from_ref, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}),
-                   ann_ref_ref, repo.ptr, ref.ptr)
+                   ann_ref_ref, repo, ref)
     return GitAnnotated(repo, ann_ref_ref[])
 end
 
@@ -36,7 +36,7 @@ function GitAnnotated(repo::GitRepo, fh::FetchHead)
     ann_ref_ref = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_annotated_commit_from_fetchhead, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Ptr{GitHash}),
-                   ann_ref_ref, repo.ptr, fh.name, fh.url, Ref(fh.oid))
+                   ann_ref_ref, repo, fh.name, fh.url, Ref(fh.oid))
     return GitAnnotated(repo, ann_ref_ref[])
 end
 
@@ -88,9 +88,11 @@ function merge_analysis(repo::GitRepo, anns::Vector{GitAnnotated})
     preference = Ref{Cint}(0)
     anns_ref = Ref(Base.map(a->a.ptr, anns), 1)
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge_analysis, libgit2), Cint,
-                  (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
-                   analysis, preference, repo.ptr, anns_ref, anns_size)
+    GC.@preserve anns begin
+        @check ccall((:git_merge_analysis, libgit2), Cint,
+                     (Ptr{Cint}, Ptr{Cint}, Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t),
+                     analysis, preference, repo, anns_ref, anns_size)
+    end
     return analysis[], preference[]
 end
 
@@ -147,11 +149,13 @@ function merge!(repo::GitRepo, anns::Vector{GitAnnotated};
                 checkout_opts::CheckoutOptions = CheckoutOptions())
     ensure_initialized()
     anns_size = Csize_t(length(anns))
-    @check ccall((:git_merge, libgit2), Cint,
-                  (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
-                   Ptr{MergeOptions}, Ptr{CheckoutOptions}),
-                   repo.ptr, Base.map(x->x.ptr, anns), anns_size,
-                   Ref(merge_opts), Ref(checkout_opts))
+    GC.@preserve anns begin
+        @check ccall((:git_merge, libgit2), Cint,
+                     (Ptr{Cvoid}, Ptr{Ptr{Cvoid}}, Csize_t,
+                      Ptr{MergeOptions}, Ptr{CheckoutOptions}),
+                     repo, Base.map(x->x.ptr, anns), anns_size,
+                     Ref(merge_opts), Ref(checkout_opts))
+    end
     @info "Review and commit merged changes"
     return true
 end
@@ -263,7 +267,7 @@ function merge_base(repo::GitRepo, one::AbstractString, two::AbstractString)
     moid = try
         @check ccall((:git_merge_base, libgit2), Cint,
                 (Ptr{GitHash}, Ptr{Cvoid}, Ptr{GitHash}, Ptr{GitHash}),
-                moid_ptr, repo.ptr, oid1_ptr, oid2_ptr)
+                moid_ptr, repo, oid1_ptr, oid2_ptr)
         moid_ptr[]
     catch e
         GitHash()
diff --git a/stdlib/LibGit2/src/oid.jl b/stdlib/LibGit2/src/oid.jl
index be4944791f55c..fae0d3737a429 100644
--- a/stdlib/LibGit2/src/oid.jl
+++ b/stdlib/LibGit2/src/oid.jl
@@ -133,7 +133,7 @@ function GitHash(repo::GitRepo, ref_name::AbstractString)
     oid_ptr  = Ref(GitHash())
     @check ccall((:git_reference_name_to_id, libgit2), Cint,
                     (Ptr{GitHash}, Ptr{Cvoid}, Cstring),
-                     oid_ptr, repo.ptr, ref_name)
+                     oid_ptr, repo, ref_name)
     return oid_ptr[]
 end
 
@@ -144,7 +144,7 @@ Get the identifier (`GitHash`) of `obj`.
 """
 function GitHash(obj::GitObject)
     ensure_initialized()
-    GitHash(ccall((:git_object_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj.ptr))
+    GitHash(ccall((:git_object_id, libgit2), Ptr{UInt8}, (Ptr{Cvoid},), obj))
 end
 
 ==(obj1::GitObject, obj2::GitObject) = GitHash(obj1) == GitHash(obj2)
@@ -160,7 +160,7 @@ function GitShortHash(obj::GitObject)
     ensure_initialized()
     buf_ref = Ref(Buffer())
     @check ccall((:git_object_short_id, libgit2), Cint,
-                 (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj.ptr)
+                 (Ptr{Buffer},Ptr{Cvoid}), buf_ref, obj)
     sid = GitShortHash(buf_ref[])
     free(buf_ref)
     return sid
diff --git a/stdlib/LibGit2/src/rebase.jl b/stdlib/LibGit2/src/rebase.jl
index b36c2f3f475cf..e4abf5a85cc92 100644
--- a/stdlib/LibGit2/src/rebase.jl
+++ b/stdlib/LibGit2/src/rebase.jl
@@ -8,14 +8,14 @@ function GitRebase(repo::GitRepo, branch::GitAnnotated, upstream::GitAnnotated;
     @check ccall((:git_rebase_init, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid},
                    Ptr{Cvoid}, Ptr{RebaseOptions}),
-                   rebase_ptr_ptr, repo.ptr, branch.ptr, upstream.ptr,
-                   onto === nothing ? C_NULL : onto.ptr, Ref(opts))
+                   rebase_ptr_ptr, repo, branch, upstream,
+                   onto === nothing ? C_NULL : onto, Ref(opts))
     return GitRebase(repo, rebase_ptr_ptr[])
 end
 
 function count(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), rb)
 end
 
 """
@@ -28,7 +28,7 @@ has not yet been called or iteration over `rb` has not yet begun), return
 """
 function current(rb::GitRebase)
     ensure_initialized()
-    return ccall((:git_rebase_operation_current, libgit2), Csize_t, (Ptr{Cvoid},), rb.ptr)
+    return ccall((:git_rebase_operation_current, libgit2), Csize_t, (Ptr{Cvoid},), rb)
 end
 
 function Base.getindex(rb::GitRebase, i::Integer)
@@ -80,7 +80,7 @@ function commit(rb::GitRebase, sig::GitSignature)
     try
         @check ccall((:git_rebase_commit, libgit2), Error.Code,
                      (Ptr{GitHash}, Ptr{Cvoid}, Ptr{SignatureStruct}, Ptr{SignatureStruct}, Ptr{UInt8}, Ptr{UInt8}),
-                      oid_ptr, rb.ptr, C_NULL, sig.ptr, C_NULL, C_NULL)
+                      oid_ptr, rb, C_NULL, sig, C_NULL, C_NULL)
     catch err
         # TODO: return current HEAD instead
         err isa GitError && err.code === Error.EAPPLIED && return nothing
@@ -101,7 +101,7 @@ rebase had completed), and `-1` for other errors.
 function abort(rb::GitRebase)
     ensure_initialized()
     return ccall((:git_rebase_abort, libgit2), Csize_t,
-                      (Ptr{Cvoid},), rb.ptr)
+                      (Ptr{Cvoid},), rb)
 end
 
 """
@@ -115,5 +115,5 @@ function finish(rb::GitRebase, sig::GitSignature)
     ensure_initialized()
     return ccall((:git_rebase_finish, libgit2), Csize_t,
                   (Ptr{Cvoid}, Ptr{SignatureStruct}),
-                   rb.ptr, sig.ptr)
+                   rb, sig)
 end
diff --git a/stdlib/LibGit2/src/reference.jl b/stdlib/LibGit2/src/reference.jl
index 9f849ed01a00f..8a9bc5cf1a6de 100644
--- a/stdlib/LibGit2/src/reference.jl
+++ b/stdlib/LibGit2/src/reference.jl
@@ -5,7 +5,7 @@ function GitReference(repo::GitRepo, refname::AbstractString)
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_reference_lookup, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                   ref_ptr_ptr, repo.ptr, refname)
+                   ref_ptr_ptr, repo, refname)
     return GitReference(repo, ref_ptr_ptr[])
 end
 
@@ -15,7 +15,7 @@ function GitReference(repo::GitRepo, obj_oid::GitHash, refname::AbstractString =
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_reference_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Ptr{GitHash}, Cint, Cstring),
-                   ref_ptr_ptr, repo.ptr, refname, Ref(obj_oid), Cint(force),
+                   ref_ptr_ptr, repo, refname, Ref(obj_oid), Cint(force),
                    isempty(msg) ? C_NULL : msg)
     return GitReference(repo, ref_ptr_ptr[])
 end
@@ -29,7 +29,7 @@ to this branch will have no parents.
 function isorphan(repo::GitRepo)
     ensure_initialized()
     r = @check ccall((:git_repository_head_unborn, libgit2), Cint,
-                     (Ptr{Cvoid},), repo.ptr)
+                     (Ptr{Cvoid},), repo)
     r != 0
 end
 
@@ -42,7 +42,7 @@ function head(repo::GitRepo)
     ensure_initialized()
     head_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_repository_head, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo.ptr)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), head_ptr_ptr, repo)
     return GitReference(repo, head_ptr_ptr[])
 end
 
@@ -68,7 +68,7 @@ function shortname(ref::GitReference)
     isempty(ref) && return ""
     ensure_initialized()
     GC.@preserve ref begin
-        name_ptr = ccall((:git_reference_shorthand, libgit2), Cstring, (Ptr{Cvoid},), ref.ptr)
+        name_ptr = ccall((:git_reference_shorthand, libgit2), Cstring, (Ptr{Cvoid},), ref)
         name_ptr == C_NULL && return ""
         name = unsafe_string(name_ptr)
     end
@@ -85,7 +85,7 @@ Return a `Cint` corresponding to the type of `ref`:
 """
 function reftype(ref::GitReference)
     ensure_initialized()
-    return ccall((:git_reference_type, libgit2), Cint, (Ptr{Cvoid},), ref.ptr)
+    return ccall((:git_reference_type, libgit2), Cint, (Ptr{Cvoid},), ref)
 end
 
 """
@@ -139,7 +139,7 @@ function ishead(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
     err = ccall((:git_branch_is_head, libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -147,7 +147,7 @@ function isbranch(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
     err = ccall((:git_reference_is_branch, libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -155,7 +155,7 @@ function istag(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
     err = ccall((:git_reference_is_tag, libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -163,7 +163,7 @@ function isremote(ref::GitReference)
     isempty(ref) && return false
     ensure_initialized()
     err = ccall((:git_reference_is_remote, libgit2), Cint,
-                  (Ptr{Cvoid},), ref.ptr)
+                  (Ptr{Cvoid},), ref)
     return err == 1
 end
 
@@ -200,7 +200,7 @@ function peel(::Type{T}, ref::GitReference) where T<:GitObject
     ensure_initialized()
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_reference_peel, libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref.ptr, Consts.OBJECT(T))
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), obj_ptr_ptr, ref, Consts.OBJECT(T))
     return T(ref.owner, obj_ptr_ptr[])
 end
 peel(ref::GitReference) = peel(GitObject, ref)
@@ -214,7 +214,7 @@ function ref_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @check ccall((:git_reference_list, libgit2), Cint,
-                      (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
+                      (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     res
@@ -237,7 +237,7 @@ function create_branch(repo::GitRepo,
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_branch_create, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Cint),
-                   ref_ptr_ptr, repo.ptr, bname, commit_obj.ptr, Cint(force))
+                   ref_ptr_ptr, repo, bname, commit_obj, Cint(force))
     return GitReference(repo, ref_ptr_ptr[])
 end
 
@@ -248,7 +248,7 @@ Delete the branch pointed to by `branch`.
 """
 function delete_branch(branch::GitReference)
     ensure_initialized()
-    @check ccall((:git_branch_delete, libgit2), Cint, (Ptr{Cvoid},), branch.ptr)
+    @check ccall((:git_branch_delete, libgit2), Cint, (Ptr{Cvoid},), branch)
 end
 
 """
@@ -260,7 +260,7 @@ function head!(repo::GitRepo, ref::GitReference)
     ensure_initialized()
     ref_name = name(ref)
     @check ccall((:git_repository_set_head, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring), repo.ptr, ref_name)
+                  (Ptr{Cvoid}, Cstring), repo, ref_name)
     return ref
 end
 
@@ -282,7 +282,7 @@ function lookup_branch(repo::GitRepo,
     branch_type = remote ? Consts.BRANCH_REMOTE : Consts.BRANCH_LOCAL
     err = ccall((:git_branch_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{UInt8}, Cint),
-                  ref_ptr_ptr, repo.ptr, branch_name, branch_type)
+                  ref_ptr_ptr, repo, branch_name, branch_type)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
             return nothing
@@ -308,7 +308,7 @@ function upstream(ref::GitReference)
     ensure_initialized()
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     err = ccall((:git_branch_upstream, libgit2), Cint,
-                  (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref.ptr)
+                  (Ref{Ptr{Cvoid}}, Ptr{Cvoid},), ref_ptr_ptr, ref)
     if err != Int(Error.GIT_OK)
         if err == Int(Error.ENOTFOUND)
             return nothing
@@ -328,7 +328,7 @@ function target!(ref::GitReference, new_oid::GitHash; msg::AbstractString="")
     ref_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_reference_set_target, libgit2), Cint,
              (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Cstring),
-             ref_ptr_ptr, ref.ptr, Ref(new_oid), isempty(msg) ? C_NULL : msg)
+             ref_ptr_ptr, ref, Ref(new_oid), isempty(msg) ? C_NULL : msg)
     return GitReference(ref.owner, ref_ptr_ptr[])
 end
 
@@ -336,7 +336,7 @@ function GitBranchIter(repo::GitRepo, flags::Cint=Cint(Consts.BRANCH_LOCAL))
     ensure_initialized()
     bi_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_branch_iterator_new, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo.ptr, flags)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), bi_ptr, repo, flags)
     return GitBranchIter(repo, bi_ptr[])
 end
 
@@ -346,7 +346,7 @@ function Base.iterate(bi::GitBranchIter, state=nothing)
     btype = Ref{Cint}()
     err = ccall((:git_branch_next, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cint}, Ptr{Cvoid}),
-                  ref_ptr_ptr, btype, bi.ptr)
+                  ref_ptr_ptr, btype, bi)
     if err == Cint(Error.GIT_OK)
         return ((GitReference(bi.owner, ref_ptr_ptr[]), btype[]), nothing)
     elseif err == Cint(Error.ITEROVER)
diff --git a/stdlib/LibGit2/src/remote.jl b/stdlib/LibGit2/src/remote.jl
index 07afecebfd373..5081eff56dd46 100644
--- a/stdlib/LibGit2/src/remote.jl
+++ b/stdlib/LibGit2/src/remote.jl
@@ -16,7 +16,7 @@ function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractStr
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_remote_create, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url)
+                rmt_ptr_ptr, repo, rmt_name, rmt_url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -39,7 +39,7 @@ function GitRemote(repo::GitRepo, rmt_name::AbstractString, rmt_url::AbstractStr
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_remote_create_with_fetchspec, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name, rmt_url, fetch_spec)
+                rmt_ptr_ptr, repo, rmt_name, rmt_url, fetch_spec)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -59,7 +59,7 @@ function GitRemoteAnon(repo::GitRepo, url::AbstractString)
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_remote_create_anonymous, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, url)
+                rmt_ptr_ptr, repo, url)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -95,7 +95,7 @@ function lookup_remote(repo::GitRepo, remote_name::AbstractString)
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     err = ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, remote_name)
+                rmt_ptr_ptr, repo, remote_name)
     if err == Int(Error.GIT_OK)
         return GitRemote(repo, rmt_ptr_ptr[])
     elseif err == Int(Error.ENOTFOUND)
@@ -110,7 +110,7 @@ function get(::Type{GitRemote}, repo::GitRepo, rmt_name::AbstractString)
     rmt_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_remote_lookup, libgit2), Cint,
                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring),
-                rmt_ptr_ptr, repo.ptr, rmt_name)
+                rmt_ptr_ptr, repo, rmt_name)
     return GitRemote(repo, rmt_ptr_ptr[])
 end
 
@@ -133,9 +133,11 @@ julia> LibGit2.url(remote)
 """
 function url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_url, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    url_ptr == C_NULL && return ""
-    return unsafe_string(url_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        url_ptr = ccall((:git_remote_url, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        url_ptr == C_NULL && return ""
+        return unsafe_string(url_ptr)
+    end
 end
 
 """
@@ -157,9 +159,11 @@ julia> LibGit2.push_url(LibGit2.get(LibGit2.GitRemote, repo, "origin"))
 """
 function push_url(rmt::GitRemote)
     ensure_initialized()
-    url_ptr = ccall((:git_remote_pushurl, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    url_ptr == C_NULL && return ""
-    return unsafe_string(url_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        url_ptr = ccall((:git_remote_pushurl, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        url_ptr == C_NULL && return ""
+        return unsafe_string(url_ptr)
+    end
 end
 
 """
@@ -183,9 +187,11 @@ julia> name(remote)
 """
 function name(rmt::GitRemote)
     ensure_initialized()
-    name_ptr = ccall((:git_remote_name, libgit2), Cstring, (Ptr{Cvoid},), rmt.ptr)
-    name_ptr == C_NULL && return ""
-    return unsafe_string(name_ptr)
+    GC.@preserve rmt begin # preserve `rmt` object until return of `unsafe_string`
+        name_ptr = ccall((:git_remote_name, libgit2), Cstring, (Ptr{Cvoid},), rmt)
+        name_ptr == C_NULL && return ""
+        return unsafe_string(name_ptr)
+    end
 end
 
 """
@@ -208,7 +214,7 @@ function fetch_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @check ccall((:git_remote_get_fetch_refspecs, libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     res
@@ -238,7 +244,7 @@ function push_refspecs(rmt::GitRemote)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @check ccall((:git_remote_get_push_refspecs, libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt.ptr)
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, rmt)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     res
@@ -261,7 +267,7 @@ String["+refs/heads/*:refs/remotes/upstream/*"]
 function add_fetch!(repo::GitRepo, rmt::GitRemote, fetch_spec::String)
     ensure_initialized()
     @check ccall((:git_remote_add_fetch, libgit2), Cint,
-                 (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
+                 (Ptr{Cvoid}, Cstring, Cstring), repo,
                  name(rmt), fetch_spec)
 end
 
@@ -290,7 +296,7 @@ String["refs/heads/master"]
 function add_push!(repo::GitRepo, rmt::GitRemote, push_spec::String)
     ensure_initialized()
     @check ccall((:git_remote_add_push, libgit2), Cint,
-                 (Ptr{Cvoid}, Cstring, Cstring), repo.ptr,
+                 (Ptr{Cvoid}, Cstring, Cstring), repo,
                  name(rmt), push_spec)
 end
 
@@ -311,7 +317,7 @@ function fetch(rmt::GitRemote, refspecs::Vector{<:AbstractString};
     msg = "libgit2.fetch: $msg"
     @check ccall((:git_remote_fetch, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{FetchOptions}, Cstring),
-                 rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
+                 rmt, isempty(refspecs) ? C_NULL : refspecs, Ref(options), msg)
 end
 
 """
@@ -336,7 +342,7 @@ function push(rmt::GitRemote, refspecs::Vector{<:AbstractString};
     ensure_initialized()
     @check ccall((:git_remote_push, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{StrArrayStruct}, Ptr{PushOptions}),
-                 rmt.ptr, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
+                 rmt, isempty(refspecs) ? C_NULL : refspecs, Ref(options))
 end
 
 """
@@ -348,7 +354,7 @@ function remote_delete(repo::GitRepo, remote_name::AbstractString)
     ensure_initialized()
     @check ccall((:git_remote_delete, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring),
-                 repo.ptr, remote_name)
+                 repo, remote_name)
 end
 
 Base.show(io::IO, rmt::GitRemote) = print(io, "GitRemote:\nRemote name: ", name(rmt), " url: ", url(rmt))
@@ -367,7 +373,7 @@ function set_remote_fetch_url(repo::GitRepo, remote_name::AbstractString, url::A
     ensure_initialized()
     @check ccall((:git_remote_set_url, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
-                 repo.ptr, remote_name, url)
+                 repo, remote_name, url)
 end
 
 function set_remote_fetch_url(path::AbstractString, remote_name::AbstractString, url::AbstractString)
@@ -390,7 +396,7 @@ function set_remote_push_url(repo::GitRepo, remote_name::AbstractString, url::Ab
     ensure_initialized()
     @check ccall((:git_remote_set_pushurl, libgit2), Cint,
                  (Ptr{Cvoid}, Cstring, Cstring),
-                 repo.ptr, remote_name, url)
+                 repo, remote_name, url)
 end
 
 function set_remote_push_url(path::AbstractString, remote_name::AbstractString, url::AbstractString)
@@ -432,7 +438,7 @@ function connect(rmt::GitRemote, direction::Consts.GIT_DIRECTION,
                  callbacks::RemoteCallbacks)
     @check ccall((:git_remote_connect, libgit2),
                  Cint, (Ptr{Cvoid}, Cint, Ref{RemoteCallbacks}, Ptr{Cvoid}, Ptr{Cvoid}),
-                 rmt.ptr, direction, callbacks, C_NULL, C_NULL)
+                 rmt, direction, callbacks, C_NULL, C_NULL)
     return rmt
 end
 
@@ -442,7 +448,7 @@ end
 Check whether the remote is connected
 """
 function connected(rmt::GitRemote)
-    return ccall((:git_remote_connected, libgit2), Cint, (Ptr{Cvoid},), rmt.ptr) != 0
+    return ccall((:git_remote_connected, libgit2), Cint, (Ptr{Cvoid},), rmt) != 0
 end
 
 """
@@ -451,7 +457,7 @@ end
 Close the connection to the remote.
 """
 function disconnect(rmt::GitRemote)
-    @check ccall((:git_remote_disconnect, libgit2), Cint, (Ptr{Cvoid},), rmt.ptr)
+    @check ccall((:git_remote_disconnect, libgit2), Cint, (Ptr{Cvoid},), rmt)
     return
 end
 
@@ -465,7 +471,7 @@ This function must only be called after connecting (See [`connect`](@ref)).
 function default_branch(rmt::GitRemote)
     buf_ref = Ref(Buffer())
     @check ccall((:git_remote_default_branch, libgit2), Cint,
-                 (Ptr{Buffer}, Ptr{Cvoid}), buf_ref, rmt.ptr)
+                 (Ptr{Buffer}, Ptr{Cvoid}), buf_ref, rmt)
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -484,7 +490,7 @@ function ls(rmt::GitRemote)
     head_refs = Ref{Ptr{Ptr{_GitRemoteHead}}}()
     @check ccall((:git_remote_ls, libgit2), Cint,
                  (Ptr{Ptr{Ptr{_GitRemoteHead}}}, Ptr{Csize_t}, Ptr{Cvoid}),
-                 head_refs, nheads, rmt.ptr)
+                 head_refs, nheads, rmt)
     head_ptr = head_refs[]
     return [GitRemoteHead(unsafe_load(unsafe_load(head_ptr, i)))
             for i in 1:nheads[]]
diff --git a/stdlib/LibGit2/src/repository.jl b/stdlib/LibGit2/src/repository.jl
index 8297ae92a6a00..192a6870f639b 100644
--- a/stdlib/LibGit2/src/repository.jl
+++ b/stdlib/LibGit2/src/repository.jl
@@ -32,7 +32,7 @@ end
 function cleanup(r::GitRepo)
     if r.ptr != C_NULL
         ensure_initialized()
-        @check ccall((:git_repository__cleanup, libgit2), Cint, (Ptr{Cvoid},), r.ptr)
+        @check ccall((:git_repository__cleanup, libgit2), Cint, (Ptr{Cvoid},), r)
     end
 end
 
@@ -97,7 +97,7 @@ tree, and no tracking information for remote branches or configurations is prese
 function isbare(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return ccall((:git_repository_is_bare, libgit2), Cint, (Ptr{Cvoid},), repo.ptr) == 1
+    return ccall((:git_repository_is_bare, libgit2), Cint, (Ptr{Cvoid},), repo) == 1
 end
 
 """
@@ -109,7 +109,7 @@ Determine if `repo` is detached - that is, whether its HEAD points to a commit
 function isattached(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    ccall((:git_repository_head_detached, libgit2), Cint, (Ptr{Cvoid},), repo.ptr) != 1
+    ccall((:git_repository_head_detached, libgit2), Cint, (Ptr{Cvoid},), repo) != 1
 end
 
 @doc """
@@ -140,13 +140,20 @@ function (::Type{T})(repo::GitRepo, spec::AbstractString) where T<:GitObject
     obj_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @assert repo.ptr != C_NULL
     @check ccall((:git_revparse_single, libgit2), Cint,
-                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo.ptr, spec)
+                 (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring), obj_ptr_ptr, repo, spec)
+    obj_ptr = obj_ptr_ptr[]
     # check object is of correct type
     if T != GitObject && T != GitUnknownObject
-        t = Consts.OBJECT(obj_ptr_ptr[])
-        t == Consts.OBJECT(T) || throw(GitError(Error.Object, Error.ERROR, "Expected object of type $T, received object of type $(objtype(t))"))
+        t = Consts.OBJECT(obj_ptr)
+        if t != Consts.OBJECT(T)
+            if obj_ptr != C_NULL
+                # free result
+                ccall((:git_object_free, libgit2), Cvoid, (Ptr{Cvoid},), obj_ptr)
+            end
+            throw(GitError(Error.Object, Error.ERROR, "Expected object of type $T, received object of type $(objtype(t))"))
+        end
     end
-    return T(repo, obj_ptr_ptr[])
+    return T(repo, obj_ptr)
 end
 
 function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
@@ -157,7 +164,7 @@ function (::Type{T})(repo::GitRepo, oid::GitHash) where T<:GitObject
     @assert repo.ptr != C_NULL
     @check ccall((:git_object_lookup, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Consts.OBJECT),
-                 obj_ptr_ptr, repo.ptr, oid_ptr, Consts.OBJECT(T))
+                 obj_ptr_ptr, repo, oid_ptr, Consts.OBJECT(T))
 
     return T(repo, obj_ptr_ptr[])
 end
@@ -169,7 +176,7 @@ function (::Type{T})(repo::GitRepo, oid::GitShortHash) where T<:GitObject
     @assert repo.ptr != C_NULL
     @check ccall((:git_object_lookup_prefix, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{GitHash}, Csize_t, Consts.OBJECT),
-                 obj_ptr_ptr, repo.ptr, oid_ptr, oid.len, Consts.OBJECT(T))
+                 obj_ptr_ptr, repo, oid_ptr, oid.len, Consts.OBJECT(T))
 
     return T(repo, obj_ptr_ptr[])
 end
@@ -190,8 +197,10 @@ See also [`workdir`](@ref), [`path`](@ref).
 function gitdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    return unsafe_string(ccall((:git_repository_path, libgit2), Cstring,
-                        (Ptr{Cvoid},), repo.ptr))
+    GC.@preserve repo begin
+        return unsafe_string(ccall((:git_repository_path, libgit2), Cstring,
+                                   (Ptr{Cvoid},), repo))
+    end
 end
 
 """
@@ -211,10 +220,12 @@ See also [`gitdir`](@ref), [`path`](@ref).
 function workdir(repo::GitRepo)
     ensure_initialized()
     @assert repo.ptr != C_NULL
-    sptr = ccall((:git_repository_workdir, libgit2), Cstring,
-                (Ptr{Cvoid},), repo.ptr)
-    sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
-    return unsafe_string(sptr)
+    GC.@preserve repo begin
+        sptr = ccall((:git_repository_workdir, libgit2), Cstring,
+                     (Ptr{Cvoid},), repo)
+        sptr == C_NULL && throw(GitError(Error.Object, Error.ERROR, "No working directory found."))
+        return unsafe_string(sptr)
+    end
 end
 
 """
@@ -256,7 +267,7 @@ function peel(::Type{T}, obj::GitObject) where T<:GitObject
     new_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
 
     @check ccall((:git_object_peel, libgit2), Cint,
-                (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj.ptr, Consts.OBJECT(T))
+                (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cint), new_ptr_ptr, obj, Consts.OBJECT(T))
 
     return T(obj.owner, new_ptr_ptr[])
 end
@@ -287,7 +298,7 @@ function GitDescribeResult(committish::GitObject;
     result_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_describe_commit, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
-                 result_ptr_ptr, committish.ptr, Ref(options))
+                 result_ptr_ptr, committish, Ref(options))
     return GitDescribeResult(committish.owner, result_ptr_ptr[])
 end
 
@@ -314,7 +325,7 @@ function GitDescribeResult(repo::GitRepo; options::DescribeOptions=DescribeOptio
     @assert repo.ptr != C_NULL
     @check ccall((:git_describe_workdir, libgit2), Cint,
                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{DescribeOptions}),
-                 result_ptr_ptr, repo.ptr, Ref(options))
+                 result_ptr_ptr, repo, Ref(options))
     return GitDescribeResult(repo, result_ptr_ptr[])
 end
 
@@ -331,7 +342,7 @@ function format(result::GitDescribeResult; options::DescribeFormatOptions=Descri
     buf_ref = Ref(Buffer())
     @check ccall((:git_describe_format, libgit2), Cint,
                  (Ptr{Buffer}, Ptr{Cvoid}, Ptr{DescribeFormatOptions}),
-                 buf_ref, result.ptr, Ref(options))
+                 buf_ref, result, Ref(options))
     buf = buf_ref[]
     str = unsafe_string(buf.ptr, buf.size)
     free(buf_ref)
@@ -357,7 +368,7 @@ function checkout_tree(repo::GitRepo, obj::GitObject;
     @assert repo.ptr != C_NULL
     @check ccall((:git_checkout_tree, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr, obj.ptr, Ref(options))
+                 repo, obj, Ref(options))
 end
 
 """
@@ -373,8 +384,8 @@ function checkout_index(repo::GitRepo, idx::Union{GitIndex, Nothing} = nothing;
     @assert repo.ptr != C_NULL
     @check ccall((:git_checkout_index, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr,
-                 idx === nothing ? C_NULL : idx.ptr,
+                 repo,
+                 idx === nothing ? C_NULL : idx,
                  Ref(options))
 end
 
@@ -393,7 +404,7 @@ function checkout_head(repo::GitRepo; options::CheckoutOptions = CheckoutOptions
     @assert repo.ptr != C_NULL
     @check ccall((:git_checkout_head, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{CheckoutOptions}),
-                 repo.ptr, Ref(options))
+                 repo, Ref(options))
 end
 
 """
@@ -412,7 +423,7 @@ function cherrypick(repo::GitRepo, commit::GitCommit; options::CherrypickOptions
     @assert repo.ptr != C_NULL
     @check ccall((:git_cherrypick, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{CherrypickOptions}),
-                 repo.ptr, commit.ptr, Ref(options))
+                 repo, commit, Ref(options))
 end
 
 """Updates some entries, determined by the `pathspecs`, in the index from the target commit tree."""
@@ -421,8 +432,8 @@ function reset!(repo::GitRepo, obj::Union{GitObject, Nothing}, pathspecs::Abstra
     @assert repo.ptr != C_NULL
     @check ccall((:git_reset_default, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{StrArrayStruct}),
-                 repo.ptr,
-                 obj === nothing ? C_NULL : obj.ptr,
+                 repo,
+                 obj === nothing ? C_NULL : obj,
                  collect(pathspecs))
     return head_oid(repo)
 end
@@ -434,7 +445,7 @@ function reset!(repo::GitRepo, obj::GitObject, mode::Cint;
     @assert repo.ptr != C_NULL
     @check ccall((:git_reset, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Cint, Ptr{CheckoutOptions}),
-                  repo.ptr, obj.ptr, mode, Ref(checkout_opts))
+                  repo, obj, mode, Ref(checkout_opts))
     return head_oid(repo)
 end
 
@@ -492,7 +503,7 @@ function fetchheads(repo::GitRepo)
     @assert repo.ptr != C_NULL
     @check ccall((:git_repository_fetchhead_foreach, libgit2), Cint,
                  (Ptr{Cvoid}, Ptr{Cvoid}, Any),
-                 repo.ptr, ffcb, fh)
+                 repo, ffcb, fh)
     return fh
 end
 
@@ -506,7 +517,7 @@ function remotes(repo::GitRepo)
     sa_ref = Ref(StrArrayStruct())
     @assert repo.ptr != C_NULL
     @check ccall((:git_remote_list, libgit2), Cint,
-                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
+                  (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     return res
diff --git a/stdlib/LibGit2/src/signature.jl b/stdlib/LibGit2/src/signature.jl
index 85e62cd8c2b7e..17013121db9ad 100644
--- a/stdlib/LibGit2/src/signature.jl
+++ b/stdlib/LibGit2/src/signature.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 function Signature(ptr::Ptr{SignatureStruct})
+    @assert ptr != C_NULL
     sig   = unsafe_load(ptr)::SignatureStruct
     name  = unsafe_string(sig.name)
     email = unsafe_string(sig.email)
@@ -67,6 +68,6 @@ function default_signature(repo::GitRepo)
     ensure_initialized()
     sig_ptr_ptr = Ref{Ptr{SignatureStruct}}(C_NULL)
     @check ccall((:git_signature_default, libgit2), Cint,
-                 (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo.ptr)
+                 (Ptr{Ptr{SignatureStruct}}, Ptr{Cvoid}), sig_ptr_ptr, repo)
     return GitSignature(sig_ptr_ptr[])
 end
diff --git a/stdlib/LibGit2/src/status.jl b/stdlib/LibGit2/src/status.jl
index c1cb2fb1c5a9c..c048e68c2b2bc 100644
--- a/stdlib/LibGit2/src/status.jl
+++ b/stdlib/LibGit2/src/status.jl
@@ -14,14 +14,14 @@ function GitStatus(repo::GitRepo; status_opts=StatusOptions())
     stat_ptr_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_status_list_new, libgit2), Cint,
                   (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Ptr{StatusOptions}),
-                  stat_ptr_ptr, repo.ptr, Ref(status_opts))
+                  stat_ptr_ptr, repo, Ref(status_opts))
     return GitStatus(repo, stat_ptr_ptr[])
 end
 
 function Base.length(status::GitStatus)
     ensure_initialized()
     return Int(ccall((:git_status_list_entrycount, libgit2), Csize_t,
-                      (Ptr{Ptr{Cvoid}},), status.ptr))
+                         (Ptr{Cvoid},), status))
 end
 
 function Base.getindex(status::GitStatus, i::Integer)
@@ -51,7 +51,7 @@ function status(repo::GitRepo, path::String)
     status_ptr = Ref{Cuint}(0)
     ret =  ccall((:git_status_file, libgit2), Cint,
                   (Ref{Cuint}, Ptr{Cvoid}, Cstring),
-                  status_ptr, repo.ptr, path)
+                  status_ptr, repo, path)
     (ret == Cint(Error.ENOTFOUND) || ret == Cint(Error.EAMBIGUOUS)) && return nothing
     return status_ptr[]
 end
diff --git a/stdlib/LibGit2/src/tag.jl b/stdlib/LibGit2/src/tag.jl
index 0e3d2b398a835..bbb0c97a484ec 100644
--- a/stdlib/LibGit2/src/tag.jl
+++ b/stdlib/LibGit2/src/tag.jl
@@ -9,7 +9,7 @@ function tag_list(repo::GitRepo)
     ensure_initialized()
     sa_ref = Ref(StrArrayStruct())
     @check ccall((:git_tag_list, libgit2), Cint,
-                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo.ptr)
+                 (Ptr{StrArrayStruct}, Ptr{Cvoid}), sa_ref, repo)
     res = convert(Vector{String}, sa_ref[])
     free(sa_ref)
     res
@@ -23,7 +23,7 @@ Remove the git tag `tag` from the repository `repo`.
 function tag_delete(repo::GitRepo, tag::AbstractString)
     ensure_initialized()
     @check ccall((:git_tag_delete, libgit2), Cint,
-                  (Ptr{Cvoid}, Cstring), repo.ptr, tag)
+                  (Ptr{Cvoid}, Cstring), repo, tag)
 end
 
 """
@@ -48,7 +48,7 @@ function tag_create(repo::GitRepo, tag::AbstractString, commit::Union{AbstractSt
             ensure_initialized()
             @check ccall((:git_tag_create, libgit2), Cint,
                  (Ptr{GitHash}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}, Ptr{SignatureStruct}, Cstring, Cint),
-                  oid_ptr, repo.ptr, tag, commit_obj.ptr, git_sig.ptr, msg, Cint(force))
+                  oid_ptr, repo, tag, commit_obj, git_sig, msg, Cint(force))
         end
     end
     return oid_ptr[]
diff --git a/stdlib/LibGit2/src/tree.jl b/stdlib/LibGit2/src/tree.jl
index 1aeeec96ea778..4c507aaba8e48 100644
--- a/stdlib/LibGit2/src/tree.jl
+++ b/stdlib/LibGit2/src/tree.jl
@@ -37,7 +37,7 @@ function treewalk(f, tree::GitTree, post::Bool = false)
         end, Cint, (Cstring, Ptr{Cvoid}, Ref{Vector{Any}}))
     err = ccall((:git_tree_walk, libgit2), Cint,
                 (Ptr{Cvoid}, Cint, Ptr{Cvoid}, Any),
-                tree.ptr, post, cbf, payload)
+                tree, post, cbf, payload)
     if err < 0
         err_class, _ = Error.last_error()
         if err_class != Error.Callback
@@ -58,8 +58,10 @@ Return the filename of the object on disk to which `te` refers.
 """
 function filename(te::GitTreeEntry)
     ensure_initialized()
-    str = ccall((:git_tree_entry_name, libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
-    str != C_NULL && return unsafe_string(str)
+    GC.@preserve te begin
+        str = ccall((:git_tree_entry_name, libgit2), Cstring, (Ptr{Cvoid},), te.ptr)
+        str != C_NULL && return unsafe_string(str)
+    end
     return nothing
 end
 
@@ -70,7 +72,7 @@ Return the UNIX filemode of the object on disk to which `te` refers as an intege
 """
 function filemode(te::GitTreeEntry)
     ensure_initialized()
-    return ccall((:git_tree_entry_filemode, libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    return ccall((:git_tree_entry_filemode, libgit2), Cint, (Ptr{Cvoid},), te)
 end
 
 """
@@ -81,7 +83,7 @@ one of the types which [`objtype`](@ref) returns, e.g. a `GitTree` or `GitBlob`.
 """
 function entrytype(te::GitTreeEntry)
     ensure_initialized()
-    otype = ccall((:git_tree_entry_type, libgit2), Cint, (Ptr{Cvoid},), te.ptr)
+    otype = ccall((:git_tree_entry_type, libgit2), Cint, (Ptr{Cvoid},), te)
     return objtype(Consts.OBJECT(otype))
 end
 
@@ -101,7 +103,7 @@ end
 
 function count(tree::GitTree)
     ensure_initialized()
-    return ccall((:git_tree_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), tree.ptr)
+    return ccall((:git_tree_entrycount, libgit2), Csize_t, (Ptr{Cvoid},), tree)
 end
 
 function Base.getindex(tree::GitTree, i::Integer)
@@ -111,7 +113,7 @@ function Base.getindex(tree::GitTree, i::Integer)
     ensure_initialized()
     te_ptr = ccall((:git_tree_entry_byindex, libgit2),
                    Ptr{Cvoid},
-                   (Ptr{Cvoid}, Csize_t), tree.ptr, i-1)
+                   (Ptr{Cvoid}, Csize_t), tree, i-1)
     return GitTreeEntry(tree, te_ptr, false)
 end
 
diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl
index b0b463c69e2f1..7a4ad37a68ca5 100644
--- a/stdlib/LibGit2/src/types.jl
+++ b/stdlib/LibGit2/src/types.jl
@@ -1057,7 +1057,6 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
                 return obj
             end
         end
-        @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::$typ) = x.ptr
     else
         @eval mutable struct $typ <: $sup
             owner::$owntyp
@@ -1072,17 +1071,17 @@ for (typ, owntyp, sup, cname) in Tuple{Symbol,Any,Symbol,Symbol}[
                 return obj
             end
         end
-        @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, x::$typ) = x.ptr
         if isa(owntyp, Expr) && owntyp.args[1] === :Union && owntyp.args[3] === :Nothing
             @eval begin
                 $typ(ptr::Ptr{Cvoid}, fin::Bool=true) = $typ(nothing, ptr, fin)
             end
         end
     end
+    @eval Base.unsafe_convert(::Type{Ptr{Cvoid}}, obj::$typ) = obj.ptr
     @eval function Base.close(obj::$typ)
         if obj.ptr != C_NULL
             ensure_initialized()
-            ccall(($(string(cname, :_free)), libgit2), Cvoid, (Ptr{Cvoid},), obj.ptr)
+            ccall(($(string(cname, :_free)), libgit2), Cvoid, (Ptr{Cvoid},), obj)
             obj.ptr = C_NULL
             if Threads.atomic_sub!(REFCOUNT, 1) == 1
                 # will the last finalizer please turn out the lights?
@@ -1116,10 +1115,11 @@ end
 function Base.close(obj::GitSignature)
     if obj.ptr != C_NULL
         ensure_initialized()
-        ccall((:git_signature_free, libgit2), Cvoid, (Ptr{SignatureStruct},), obj.ptr)
+        ccall((:git_signature_free, libgit2), Cvoid, (Ptr{SignatureStruct},), obj)
         obj.ptr = C_NULL
     end
 end
+Base.unsafe_convert(::Type{Ptr{SignatureStruct}}, obj::GitSignature) = obj.ptr
 
 # Structure has the same layout as SignatureStruct
 mutable struct Signature
diff --git a/stdlib/LibGit2/src/walker.jl b/stdlib/LibGit2/src/walker.jl
index e43687b014226..239009a014c1e 100644
--- a/stdlib/LibGit2/src/walker.jl
+++ b/stdlib/LibGit2/src/walker.jl
@@ -22,7 +22,7 @@ function GitRevWalker(repo::GitRepo)
     ensure_initialized()
     w_ptr = Ref{Ptr{Cvoid}}(C_NULL)
     @check ccall((:git_revwalk_new, libgit2), Cint,
-                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo.ptr)
+                  (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}), w_ptr, repo)
     return GitRevWalker(repo, w_ptr[])
 end
 
@@ -30,7 +30,7 @@ function Base.iterate(w::GitRevWalker, state=nothing)
     ensure_initialized()
     id_ptr = Ref(GitHash())
     err = ccall((:git_revwalk_next, libgit2), Cint,
-                (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w.ptr)
+                (Ptr{GitHash}, Ptr{Cvoid}), id_ptr, w)
     if err == Cint(Error.GIT_OK)
         return (id_ptr[], nothing)
     elseif err == Cint(Error.ITEROVER)
@@ -51,7 +51,7 @@ during the walk.
 """
 function push_head!(w::GitRevWalker)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_head, libgit2), Cint, (Ptr{Cvoid},), w.ptr)
+    @check ccall((:git_revwalk_push_head, libgit2), Cint, (Ptr{Cvoid},), w)
     return w
 end
 
@@ -64,20 +64,20 @@ of that year as `cid` and then passing the resulting `w` to [`LibGit2.map`](@ref
 """
 function push!(w::GitRevWalker, cid::GitHash)
     ensure_initialized()
-    @check ccall((:git_revwalk_push, libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w.ptr, Ref(cid))
+    @check ccall((:git_revwalk_push, libgit2), Cint, (Ptr{Cvoid}, Ptr{GitHash}), w, Ref(cid))
     return w
 end
 
 function push!(w::GitRevWalker, range::AbstractString)
     ensure_initialized()
-    @check ccall((:git_revwalk_push_range, libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w.ptr, range)
+    @check ccall((:git_revwalk_push_range, libgit2), Cint, (Ptr{Cvoid}, Ptr{UInt8}), w, range)
     return w
 end
 
 function Base.sort!(w::GitRevWalker; by::Cint = Consts.SORT_NONE, rev::Bool=false)
     ensure_initialized()
     rev && (by |= Consts.SORT_REVERSE)
-    @check ccall((:git_revwalk_sorting, libgit2), Cint, (Ptr{Cvoid}, Cint), w.ptr, by)
+    @check ccall((:git_revwalk_sorting, libgit2), Cint, (Ptr{Cvoid}, Cint), w, by)
     return w
 end
 
diff --git a/stdlib/LibGit2/test/libgit2-tests.jl b/stdlib/LibGit2/test/libgit2-tests.jl
index c5abca15ca719..72ca1019ff9e0 100644
--- a/stdlib/LibGit2/test/libgit2-tests.jl
+++ b/stdlib/LibGit2/test/libgit2-tests.jl
@@ -8,113 +8,11 @@ using Test
 using Random, Serialization, Sockets
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
-import .Main.FakePTYs: with_fake_pty
-
-const timeout = 60
-
-function challenge_prompt(code::Expr, challenges)
-    input_code = tempname()
-    open(input_code, "w") do fp
-        serialize(fp, code)
-    end
-    output_file = tempname()
-    torun = """
-        import LibGit2
-        using Serialization
-        result = open($(repr(input_code))) do fp
-            eval(deserialize(fp))
-        end
-        open($(repr(output_file)), "w") do fp
-            serialize(fp, result)
-        end"""
-    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
-    try
-        challenge_prompt(cmd, challenges)
-        return open(output_file, "r") do fp
-            deserialize(fp)
-        end
-    finally
-        isfile(output_file) && rm(output_file)
-        isfile(input_code) && rm(input_code)
-    end
-    return nothing
-end
+isdefined(Main, :ChallengePrompts) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ChallengePrompts.jl"))
+using .Main.ChallengePrompts: challenge_prompt as basic_challenge_prompt
 
-function challenge_prompt(cmd::Cmd, challenges)
-    function format_output(output)
-        str = read(seekstart(output), String)
-        isempty(str) && return ""
-        return "Process output found:\n\"\"\"\n$str\n\"\"\""
-    end
-    out = IOBuffer()
-    with_fake_pty() do pts, ptm
-        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
-        Base.close_stdio(pts)
-
-        # Kill the process if it takes too long. Typically occurs when process is waiting
-        # for input.
-        timer = Channel{Symbol}(1)
-        watcher = @async begin
-            waited = 0
-            while waited < timeout && process_running(p)
-                sleep(1)
-                waited += 1
-            end
-
-            if process_running(p)
-                kill(p)
-                put!(timer, :timeout)
-            elseif success(p)
-                put!(timer, :success)
-            else
-                put!(timer, :failure)
-            end
-
-            # SIGKILL stubborn processes
-            if process_running(p)
-                sleep(3)
-                process_running(p) && kill(p, Base.SIGKILL)
-            end
-            wait(p)
-        end
-
-        wroteall = false
-        try
-            for (challenge, response) in challenges
-                write(out, readuntil(ptm, challenge, keep=true))
-                if !isopen(ptm)
-                    error("Could not locate challenge: \"$challenge\". ",
-                          format_output(out))
-                end
-                write(ptm, response)
-            end
-            wroteall = true
-
-            # Capture output from process until `pts` is closed
-            write(out, ptm)
-        catch ex
-            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
-                # ignore EIO from `ptm` after `pts` dies
-                error("Process failed possibly waiting for a response. ",
-                      format_output(out))
-            end
-        end
-
-        status = fetch(timer)
-        close(ptm)
-        if status !== :success
-            if status === :timeout
-                error("Process timed out possibly waiting for a response. ",
-                      format_output(out))
-            else
-                error("Failed process. ", format_output(out), "\n", p)
-            end
-        end
-        wait(watcher)
-    end
-    nothing
-end
+challenge_prompt(code::Expr, challenges) = basic_challenge_prompt(code, challenges; pkgs=["LibGit2"])
+challenge_prompt(cmd::Cmd, challenges) = basic_challenge_prompt(cmd, challenges)
 
 const LIBGIT2_MIN_VER = v"1.0.0"
 const LIBGIT2_HELPER_PATH = joinpath(@__DIR__, "libgit2-helpers.jl")
diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml
index 7c61fdf89df70..fb03c6b996048 100644
--- a/stdlib/LibUV_jll/Project.toml
+++ b/stdlib/LibUV_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUV_jll"
 uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
-version = "2.0.1+16"
+version = "2.0.1+18"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LibUnwind_jll/Project.toml b/stdlib/LibUnwind_jll/Project.toml
index eda312ec0735e..03ccfcd1449d8 100644
--- a/stdlib/LibUnwind_jll/Project.toml
+++ b/stdlib/LibUnwind_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "LibUnwind_jll"
 uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
-version = "1.8.1+0"
+version = "1.8.1+1"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index bad0431755e98..15354603943c2 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -174,7 +174,9 @@ public AbstractTriangular,
         isbanded,
         peakflops,
         symmetric,
-        symmetric_type
+        symmetric_type,
+        zeroslike,
+        matprod_dest
 
 const BlasFloat = Union{Float64,Float32,ComplexF64,ComplexF32}
 const BlasReal = Union{Float64,Float32}
@@ -394,17 +396,8 @@ julia> Y = zero(X);
 
 julia> ldiv!(Y, qr(A), X);
 
-julia> Y
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.051652892561983674
-  0.10020661157024757
-
-julia> A\\X
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.05165289256198333
-  0.10020661157024785
+julia> Y ≈ A\\X
+true
 ```
 """
 ldiv!(Y, A, B)
@@ -435,17 +428,8 @@ julia> Y = copy(X);
 
 julia> ldiv!(qr(A), X);
 
-julia> X
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.051652892561983674
-  0.10020661157024757
-
-julia> A\\Y
-3-element Vector{Float64}:
-  0.7128099173553719
- -0.05165289256198333
-  0.10020661157024785
+julia> X ≈ A\\Y
+true
 ```
 """
 ldiv!(A, B)
@@ -655,10 +639,6 @@ _evview(S::SymTridiagonal) = @view S.ev[begin:begin + length(S.dv) - 2]
 _zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
 _zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
 
-# convert to Vector, if necessary
-_makevector(x::Vector) = x
-_makevector(x::AbstractVector) = Vector(x)
-
 # append a zero element / drop the last element
 _pushzero(A) = (B = similar(A, length(A)+1); @inbounds B[begin:end-1] .= A; @inbounds B[end] = zero(eltype(B)); B)
 _droplast!(A) = deleteat!(A, lastindex(A))
@@ -677,7 +657,9 @@ matprod_dest(A::Diagonal, B::Diagonal, TS) = _matprod_dest_diag(B, TS)
 _matprod_dest_diag(A, TS) = similar(A, TS)
 function _matprod_dest_diag(A::SymTridiagonal, TS)
     n = size(A, 1)
-    Tridiagonal(similar(A, TS, n-1), similar(A, TS, n), similar(A, TS, n-1))
+    ev = similar(A, TS, max(0, n-1))
+    dv = similar(A, TS, n)
+    Tridiagonal(ev, dv, similar(ev))
 end
 
 # Special handling for adj/trans vec
@@ -847,9 +829,9 @@ function __init__()
     # https://github.com/xianyi/OpenBLAS/blob/c43ec53bdd00d9423fc609d7b7ecb35e7bf41b85/README.md#setting-the-number-of-threads-using-environment-variables
     if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "GOTO_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
         @static if Sys.isapple() && Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "aarch64"
-            BLAS.set_num_threads(max(1, Sys.CPU_THREADS))
+            BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint)))
         else
-            BLAS.set_num_threads(max(1, Sys.CPU_THREADS ÷ 2))
+            BLAS.set_num_threads(max(1, @ccall(jl_effective_threads()::Cint) ÷ 2))
         end
     end
 end
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
index daee587b82835..b722e49bb2c3d 100644
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ b/stdlib/LinearAlgebra/src/adjtrans.jl
@@ -302,6 +302,16 @@ function Base.showarg(io::IO, v::Transpose, toplevel)
     toplevel && print(io, " with eltype ", eltype(v))
     return nothing
 end
+function Base.show(io::IO, v::Adjoint{<:Real, <:AbstractVector})
+    print(io, "adjoint(")
+    show(io, parent(v))
+    print(io, ")")
+end
+function Base.show(io::IO, v::Transpose{<:Number, <:AbstractVector})
+    print(io, "transpose(")
+    show(io, parent(v))
+    print(io, ")")
+end
 
 # some aliases for internal convenience use
 const AdjOrTrans{T,S} = Union{Adjoint{T,S},Transpose{T,S}} where {T,S}
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
index 04d54911d88aa..381afd2f09a61 100644
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ b/stdlib/LinearAlgebra/src/bidiag.jl
@@ -118,26 +118,25 @@ Bidiagonal(A::Bidiagonal) = A
 Bidiagonal{T}(A::Bidiagonal{T}) where {T} = A
 Bidiagonal{T}(A::Bidiagonal) where {T} = Bidiagonal{T}(A.dv, A.ev, A.uplo)
 
-bidiagzero(::Bidiagonal{T}, i, j) where {T} = zero(T)
-function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j)
-    Tel = eltype(eltype(A.dv))
+function diagzero(A::Bidiagonal{<:AbstractMatrix}, i, j)
+    Tel = eltype(A)
     if i < j && A.uplo == 'U' #= top right zeros =#
-        return zeros(Tel, size(A.ev[i], 1), size(A.ev[j-1], 2))
+        return zeroslike(Tel, axes(A.ev[i], 1), axes(A.ev[j-1], 2))
     elseif j < i && A.uplo == 'L' #= bottom left zeros =#
-        return zeros(Tel, size(A.ev[i-1], 1), size(A.ev[j], 2))
+        return zeroslike(Tel, axes(A.ev[i-1], 1), axes(A.ev[j], 2))
     else
-        return zeros(Tel, size(A.dv[i], 1), size(A.dv[j], 2))
+        return zeroslike(Tel, axes(A.dv[i], 1), axes(A.dv[j], 2))
     end
 end
 
+_offdiagind(uplo) = uplo == 'U' ? 1 : -1
+
 @inline function Base.isassigned(A::Bidiagonal, i::Int, j::Int)
     @boundscheck checkbounds(Bool, A, i, j) || return false
     if i == j
         return @inbounds isassigned(A.dv, i)
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds isassigned(A.ev, i)
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds isassigned(A.ev, j)
+    elseif i == j - _offdiagind(A.uplo)
+        return @inbounds isassigned(A.ev, A.uplo == 'U' ? i : j)
     else
         return true
     end
@@ -147,10 +146,8 @@ end
     @boundscheck checkbounds(A, i, j)
     if i == j
         return @inbounds Base.isstored(A.dv, i)
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds Base.isstored(A.ev, i)
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds Base.isstored(A.ev, j)
+    elseif i == j - _offdiagind(A.uplo)
+        return @inbounds Base.isstored(A.ev, A.uplo == 'U' ? i : j)
     else
         return false
     end
@@ -160,25 +157,22 @@ end
     @boundscheck checkbounds(A, i, j)
     if i == j
         return @inbounds A.dv[i]
-    elseif A.uplo == 'U' && (i == j - 1)
-        return @inbounds A.ev[i]
-    elseif A.uplo == 'L' && (i == j + 1)
-        return @inbounds A.ev[j]
+    elseif i == j - _offdiagind(A.uplo)
+        return @inbounds A.ev[A.uplo == 'U' ? i : j]
     else
-        return bidiagzero(A, i, j)
+        return diagzero(A, i, j)
     end
 end
 
 @inline function getindex(A::Bidiagonal{T}, b::BandIndex) where T
-    @boundscheck checkbounds(A, _cartinds(b))
+    @boundscheck checkbounds(A, b)
     if b.band == 0
         return @inbounds A.dv[b.index]
-    elseif A.uplo == 'U' && b.band == 1
-        return @inbounds A.ev[b.index]
-    elseif A.uplo == 'L' && b.band == -1
+    elseif b.band ∈ (-1,1) && b.band == _offdiagind(A.uplo)
+        # we explicitly compare the possible bands as b.band may be constant-propagated
         return @inbounds A.ev[b.index]
     else
-        return bidiagzero(A, Tuple(_cartinds(b))...)
+        return diagzero(A, Tuple(_cartinds(b))...)
     end
 end
 
@@ -186,15 +180,13 @@ end
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.dv[i] = x
-    elseif A.uplo == 'U' && (i == j - 1)
-        @inbounds A.ev[i] = x
-    elseif A.uplo == 'L' && (i == j + 1)
-        @inbounds A.ev[j] = x
+    elseif i == j - _offdiagind(A.uplo)
+        @inbounds A.ev[A.uplo == 'U' ? i : j] = x
     elseif !iszero(x)
         throw(ArgumentError(LazyString(lazy"cannot set entry ($i, $j) off the ",
-            istriu(A) ? "upper" : "lower", " bidiagonal band to a nonzero value ", x)))
+            A.uplo == 'U' ? "upper" : "lower", " bidiagonal band to a nonzero value ", x)))
     end
-    return x
+    return A
 end
 
 Base._reverse(A::Bidiagonal, dims) = reverse!(Matrix(A); dims)
@@ -202,11 +194,7 @@ Base._reverse(A::Bidiagonal, ::Colon) = Bidiagonal(reverse(A.dv), reverse(A.ev),
 
 ## structured matrix methods ##
 function Base.replace_in_print_matrix(A::Bidiagonal,i::Integer,j::Integer,s::AbstractString)
-    if A.uplo == 'U'
-        i==j || i==j-1 ? s : Base.replace_with_centered_mark(s)
-    else
-        i==j || i==j+1 ? s : Base.replace_with_centered_mark(s)
-    end
+    i==j || i==j-_offdiagind(A.uplo) ? s : Base.replace_with_centered_mark(s)
 end
 
 #Converting from Bidiagonal to dense Matrix
@@ -215,7 +203,7 @@ function Matrix{T}(A::Bidiagonal) where T
     if haszero(T) # optimized path for types with zero(T) defined
         size(B,1) > 1 && fill!(B, zero(T))
         copyto!(view(B, diagind(B)), A.dv)
-        copyto!(view(B, diagind(B, A.uplo == 'U' ? 1 : -1)), A.ev)
+        copyto!(view(B, diagind(B, _offdiagind(A.uplo))), A.ev)
     else
         copyto!(B, A)
     end
@@ -252,8 +240,8 @@ tr(B::Bidiagonal) = sum(B.dv)
 
 function kron(A::Diagonal, B::Bidiagonal)
     # `_droplast!` is only guaranteed to work with `Vector`
-    kdv = _makevector(kron(diag(A), B.dv))
-    kev = _droplast!(_makevector(kron(diag(A), _pushzero(B.ev))))
+    kdv = convert(Vector, kron(diag(A), B.dv))
+    kev = _droplast!(convert(Vector, kron(diag(A), _pushzero(B.ev))))
     Bidiagonal(kdv, kev, B.uplo)
 end
 
@@ -276,12 +264,13 @@ end
 ####################
 
 function show(io::IO, M::Bidiagonal)
-    # TODO: make this readable and one-line
-    summary(io, M)
-    print(io, ":\n diag:")
-    print_matrix(io, (M.dv)')
-    print(io, M.uplo == 'U' ? "\n super:" : "\n sub:")
-    print_matrix(io, (M.ev)')
+    print(io, "Bidiagonal(")
+    show(io, M.dv)
+    print(io, ", ")
+    show(io, M.ev)
+    print(io, ", ")
+    show(io, sym_uplo(M.uplo))
+    print(io, ")")
 end
 
 size(M::Bidiagonal) = (n = length(M.dv); (n, n))
@@ -298,7 +287,7 @@ adjoint(B::Bidiagonal{<:Number, <:Base.ReshapedArray{<:Number,1,<:Adjoint}}) =
 transpose(B::Bidiagonal{<:Number}) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? :L : :U)
 permutedims(B::Bidiagonal) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? 'L' : 'U')
 function permutedims(B::Bidiagonal, perm)
-    Base.checkdims_perm(B, B, perm)
+    Base.checkdims_perm(axes(B), axes(B), perm)
     NTuple{2}(perm) == (2, 1) ? permutedims(B) : B
 end
 function Base.copy(aB::Adjoint{<:Any,<:Bidiagonal})
@@ -310,18 +299,22 @@ function Base.copy(tB::Transpose{<:Any,<:Bidiagonal})
     return Bidiagonal(map(x -> copy.(transpose.(x)), (B.dv, B.ev))..., B.uplo == 'U' ? :L : :U)
 end
 
+@noinline function throw_zeroband_error(A)
+    uplo = A.uplo
+    zeroband = uplo == 'U' ? "lower" : "upper"
+    throw(ArgumentError(LazyString("cannot set the ",
+        zeroband, " bidiagonal band to a nonzero value for uplo=:", uplo)))
+end
+
 # copyto! for matching axes
 function _copyto_banded!(A::Bidiagonal, B::Bidiagonal)
     A.dv .= B.dv
     if A.uplo == B.uplo
         A.ev .= B.ev
     elseif iszero(B.ev) # diagonal source
-        A.ev .= zero.(A.ev)
+        A.ev .= B.ev
     else
-        zeroband = istriu(A) ? "lower" : "upper"
-        uplo = A.uplo
-        throw(ArgumentError(LazyString("cannot set the ",
-            zeroband, " bidiagonal band to a nonzero value for uplo=:", uplo)))
+        throw_zeroband_error(A)
     end
     return A
 end
@@ -408,7 +401,7 @@ function triu!(M::Bidiagonal{T}, k::Integer=0) where T
     return M
 end
 
-function diag(M::Bidiagonal{T}, n::Integer=0) where T
+function diag(M::Bidiagonal, n::Integer=0)
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of n
     if n == 0
@@ -416,7 +409,11 @@ function diag(M::Bidiagonal{T}, n::Integer=0) where T
     elseif (n == 1 && M.uplo == 'U') ||  (n == -1 && M.uplo == 'L')
         return copyto!(similar(M.ev, length(M.ev)), M.ev)
     elseif -size(M,1) <= n <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-abs(n)), zero(T))
+        v = similar(M.dv, size(M,1)-abs(n))
+        for i in eachindex(v)
+            v[i] = M[BandIndex(n,i)]
+        end
+        return v
     else
         throw(ArgumentError(LazyString(lazy"requested diagonal, $n, must be at least $(-size(M, 1)) ",
             lazy"and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
@@ -444,6 +441,32 @@ end
 -(A::Bidiagonal)=Bidiagonal(-A.dv,-A.ev,A.uplo)
 *(A::Bidiagonal, B::Number) = Bidiagonal(A.dv*B, A.ev*B, A.uplo)
 *(B::Number, A::Bidiagonal) = Bidiagonal(B*A.dv, B*A.ev, A.uplo)
+function rmul!(B::Bidiagonal, x::Number)
+    if size(B,1) > 1
+        isupper = B.uplo == 'U'
+        row, col = 1 + isupper, 1 + !isupper
+        # ensure that zeros are preserved on scaling
+        y = B[row,col] * x
+        iszero(y) || throw(ArgumentError(LazyString(lazy"cannot set index ($row, $col) off ",
+            lazy"the tridiagonal band to a nonzero value ($y)")))
+    end
+    @. B.dv *= x
+    @. B.ev *= x
+    return B
+end
+function lmul!(x::Number, B::Bidiagonal)
+    if size(B,1) > 1
+        isupper = B.uplo == 'U'
+        row, col = 1 + isupper, 1 + !isupper
+        # ensure that zeros are preserved on scaling
+        y = x * B[row,col]
+        iszero(y) || throw(ArgumentError(LazyString(lazy"cannot set index ($row, $col) off ",
+            lazy"the tridiagonal band to a nonzero value ($y)")))
+    end
+    @. B.dv = x * B.dv
+    @. B.ev = x * B.ev
+    return B
+end
 /(A::Bidiagonal, B::Number) = Bidiagonal(A.dv/B, A.ev/B, A.uplo)
 \(B::Number, A::Bidiagonal) = Bidiagonal(B\A.dv, B\A.ev, A.uplo)
 
@@ -472,7 +495,7 @@ const BiTri = Union{Bidiagonal,Tridiagonal}
 
 # B .= A * B
 function lmul!(A::Bidiagonal, B::AbstractVecOrMat)
-    _muldiag_size_check(A, B)
+    _muldiag_size_check(size(A), size(B))
     (; dv, ev) = A
     if A.uplo == 'U'
         for k in axes(B,2)
@@ -493,7 +516,7 @@ function lmul!(A::Bidiagonal, B::AbstractVecOrMat)
 end
 # B .= D * B
 function lmul!(D::Diagonal, B::Bidiagonal)
-    _muldiag_size_check(D, B)
+    _muldiag_size_check(size(D), size(B))
     (; dv, ev) = B
     isL = B.uplo == 'L'
     dv[1] = D.diag[1] * dv[1]
@@ -505,7 +528,7 @@ function lmul!(D::Diagonal, B::Bidiagonal)
 end
 # B .= B * A
 function rmul!(B::AbstractMatrix, A::Bidiagonal)
-    _muldiag_size_check(A, B)
+    _muldiag_size_check(size(A), size(B))
     (; dv, ev) = A
     if A.uplo == 'U'
         for k in reverse(axes(dv,1)[2:end])
@@ -530,7 +553,7 @@ function rmul!(B::AbstractMatrix, A::Bidiagonal)
 end
 # B .= B * D
 function rmul!(B::Bidiagonal, D::Diagonal)
-    _muldiag_size_check(B, D)
+    _muldiag_size_check(size(B), size(D))
     (; dv, ev) = B
     isU = B.uplo == 'U'
     dv[1] *= D.diag[1]
@@ -541,27 +564,31 @@ function rmul!(B::Bidiagonal, D::Diagonal)
     return B
 end
 
-function check_A_mul_B!_sizes(C, A, B)
-    mA, nA = size(A)
-    mB, nB = size(B)
-    mC, nC = size(C)
+@noinline function check_A_mul_B!_sizes((mC, nC)::NTuple{2,Integer}, (mA, nA)::NTuple{2,Integer}, (mB, nB)::NTuple{2,Integer})
+    # check for matching sizes in one column of B and C
+    check_A_mul_B!_sizes((mC,), (mA, nA), (mB,))
+    # ensure that the number of columns in B and C match
+    if nB != nC
+        throw(DimensionMismatch(lazy"second dimension of output C, $nC, and second dimension of B, $nB, must match"))
+    end
+end
+@noinline function check_A_mul_B!_sizes((mC,)::Tuple{Integer}, (mA, nA)::NTuple{2,Integer}, (mB,)::Tuple{Integer})
     if mA != mC
         throw(DimensionMismatch(lazy"first dimension of A, $mA, and first dimension of output C, $mC, must match"))
     elseif nA != mB
         throw(DimensionMismatch(lazy"second dimension of A, $nA, and first dimension of B, $mB, must match"))
-    elseif nB != nC
-        throw(DimensionMismatch(lazy"second dimension of output C, $nC, and second dimension of B, $nB, must match"))
     end
 end
 
 # function to get the internally stored vectors for Bidiagonal and [Sym]Tridiagonal
 # to avoid allocations in _mul! below (#24324, #24578)
 _diag(A::Tridiagonal, k) = k == -1 ? A.dl : k == 0 ? A.d : A.du
-_diag(A::SymTridiagonal, k) = k == 0 ? A.dv : A.ev
+_diag(A::SymTridiagonal{<:Number}, k) = k == 0 ? A.dv : A.ev
+_diag(A::SymTridiagonal, k) = k == 0 ? view(A, diagind(A, IndexStyle(A))) : view(A, diagind(A, 1, IndexStyle(A)))
 function _diag(A::Bidiagonal, k)
     if k == 0
         return A.dv
-    elseif (A.uplo == 'L' && k == -1) || (A.uplo == 'U' && k == 1)
+    elseif k == _offdiagind(A.uplo)
         return A.ev
     else
         return diag(A, k)
@@ -573,14 +600,49 @@ _mul!(C::AbstractMatrix, A::BiTriSym, B::TriSym, _add::MulAddMul) =
 _mul!(C::AbstractMatrix, A::BiTriSym, B::Bidiagonal, _add::MulAddMul) =
     _bibimul!(C, A, B, _add)
 function _bibimul!(C, A, B, _add)
-    check_A_mul_B!_sizes(C, A, B)
+    require_one_based_indexing(C)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     n = size(A,1)
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
+    iszero(n) && return C
     # We use `_rmul_or_fill!` instead of `_modify!` here since using
     # `_modify!` in the following loop will not update the
     # off-diagonal elements for non-zero beta.
     _rmul_or_fill!(C, _add.beta)
     iszero(_add.alpha) && return C
+    if n <= 3
+        # naive multiplication
+        for I in CartesianIndices(C)
+            C[I] += _add(sum(A[I[1], k] * B[k, I[2]] for k in axes(A,2)))
+        end
+        return C
+    end
+    @inbounds begin
+        # first column of C
+        C[1,1] += _add(A[1,1]*B[1,1] + A[1, 2]*B[2,1])
+        C[2,1] += _add(A[2,1]*B[1,1] + A[2,2]*B[2,1])
+        C[3,1] += _add(A[3,2]*B[2,1])
+        # second column of C
+        C[1,2] += _add(A[1,1]*B[1,2] + A[1,2]*B[2,2])
+        C[2,2] += _add(A[2,1]*B[1,2] + A[2,2]*B[2,2] + A[2,3]*B[3,2])
+        C[3,2] += _add(A[3,2]*B[2,2] + A[3,3]*B[3,2])
+        C[4,2] += _add(A[4,3]*B[3,2])
+    end # inbounds
+    # middle columns
+    __bibimul!(C, A, B, _add)
+    @inbounds begin
+        C[n-3,n-1] += _add(A[n-3,n-2]*B[n-2,n-1])
+        C[n-2,n-1] += _add(A[n-2,n-2]*B[n-2,n-1] + A[n-2,n-1]*B[n-1,n-1])
+        C[n-1,n-1] += _add(A[n-1,n-2]*B[n-2,n-1] + A[n-1,n-1]*B[n-1,n-1] + A[n-1,n]*B[n,n-1])
+        C[n,  n-1] += _add(A[n,n-1]*B[n-1,n-1] + A[n,n]*B[n,n-1])
+        # last column of C
+        C[n-2,  n] += _add(A[n-2,n-1]*B[n-1,n])
+        C[n-1,  n] += _add(A[n-1,n-1]*B[n-1,n  ] + A[n-1,n]*B[n,n  ])
+        C[n,    n] += _add(A[n,n-1]*B[n-1,n  ] + A[n,n]*B[n,n  ])
+    end # inbounds
+    C
+end
+function __bibimul!(C, A, B, _add)
+    n = size(A,1)
     Al = _diag(A, -1)
     Ad = _diag(A, 0)
     Au = _diag(A, 1)
@@ -588,53 +650,206 @@ function _bibimul!(C, A, B, _add)
     Bd = _diag(B, 0)
     Bu = _diag(B, 1)
     @inbounds begin
-        # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1] + A[1, 2]*B[2, 1])
-        C[1,2] += _add(A[1,1]*B[1,2] + A[1,2]*B[2,2])
-        C[1,3] += _add(A[1,2]*B[2,3])
-        # second row of C
-        C[2,1] += _add(A[2,1]*B[1,1] + A[2,2]*B[2,1])
-        C[2,2] += _add(A[2,1]*B[1,2] + A[2,2]*B[2,2] + A[2,3]*B[3,2])
-        C[2,3] += _add(A[2,2]*B[2,3] + A[2,3]*B[3,3])
-        C[2,4] += _add(A[2,3]*B[3,4])
         for j in 3:n-2
-            Ajj₋1   = Al[j-1]
-            Ajj     = Ad[j]
+            Aj₋2j₋1 = Au[j-2]
+            Aj₋1j   = Au[j-1]
             Ajj₊1   = Au[j]
-            Bj₋1j₋2 = Bl[j-2]
-            Bj₋1j₋1 = Bd[j-1]
+            Aj₋1j₋1 = Ad[j-1]
+            Ajj     = Ad[j]
+            Aj₊1j₊1 = Ad[j+1]
+            Ajj₋1   = Al[j-1]
+            Aj₊1j   = Al[j]
+            Aj₊2j₊1 = Al[j+1]
             Bj₋1j   = Bu[j-1]
-            Bjj₋1   = Bl[j-1]
             Bjj     = Bd[j]
-            Bjj₊1   = Bu[j]
             Bj₊1j   = Bl[j]
-            Bj₊1j₊1 = Bd[j+1]
-            Bj₊1j₊2 = Bu[j+1]
-            C[j,j-2]  += _add( Ajj₋1*Bj₋1j₋2)
-            C[j, j-1] += _add(Ajj₋1*Bj₋1j₋1 + Ajj*Bjj₋1)
-            C[j, j  ] += _add(Ajj₋1*Bj₋1j   + Ajj*Bjj       + Ajj₊1*Bj₊1j)
-            C[j, j+1] += _add(Ajj  *Bjj₊1   + Ajj₊1*Bj₊1j₊1)
-            C[j, j+2] += _add(Ajj₊1*Bj₊1j₊2)
+
+            C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j)
+            C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj)
+            C[j,   j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj + Ajj₊1*Bj₊1j)
+            C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j)
+            C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j)
         end
-        # row before last of C
-        C[n-1,n-3] += _add(A[n-1,n-2]*B[n-2,n-3])
-        C[n-1,n-2] += _add(A[n-1,n-1]*B[n-1,n-2] + A[n-1,n-2]*B[n-2,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-2]*B[n-2,n-1] + A[n-1,n-1]*B[n-1,n-1] + A[n-1,n]*B[n,n-1])
-        C[n-1,n  ] += _add(A[n-1,n-1]*B[n-1,n  ] + A[n-1,  n]*B[n  ,n  ])
-        # last row of C
-        C[n,n-2] += _add(A[n,n-1]*B[n-1,n-2])
-        C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1] + A[n,n]*B[n,n-1])
-        C[n,n  ] += _add(A[n,n-1]*B[n-1,n  ] + A[n,n]*B[n,n  ])
-    end # inbounds
+    end
+    C
+end
+function __bibimul!(C, A, B::Bidiagonal, _add)
+    n = size(A,1)
+    Al = _diag(A, -1)
+    Ad = _diag(A, 0)
+    Au = _diag(A, 1)
+    Bd = _diag(B, 0)
+    if B.uplo == 'U'
+        Bu = _diag(B, 1)
+        @inbounds begin
+            for j in 3:n-2
+                Aj₋2j₋1 = Au[j-2]
+                Aj₋1j   = Au[j-1]
+                Aj₋1j₋1 = Ad[j-1]
+                Ajj     = Ad[j]
+                Ajj₋1   = Al[j-1]
+                Aj₊1j   = Al[j]
+                Bj₋1j   = Bu[j-1]
+                Bjj     = Bd[j]
+
+                C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j)
+                C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj)
+                C[j,   j] += _add(Ajj₋1*Bj₋1j + Ajj*Bjj)
+                C[j+1, j] += _add(Aj₊1j*Bjj)
+            end
+        end
+    else # B.uplo == 'L'
+        Bl = _diag(B, -1)
+        @inbounds begin
+            for j in 3:n-2
+                Aj₋1j   = Au[j-1]
+                Ajj₊1   = Au[j]
+                Ajj     = Ad[j]
+                Aj₊1j₊1 = Ad[j+1]
+                Aj₊1j   = Al[j]
+                Aj₊2j₊1 = Al[j+1]
+                Bjj     = Bd[j]
+                Bj₊1j   = Bl[j]
+
+                C[j-1, j] += _add(Aj₋1j*Bjj)
+                C[j,   j] += _add(Ajj*Bjj + Ajj₊1*Bj₊1j)
+                C[j+1, j] += _add(Aj₊1j*Bjj + Aj₊1j₊1*Bj₊1j)
+                C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j)
+            end
+        end
+    end
+    C
+end
+function __bibimul!(C, A::Bidiagonal, B, _add)
+    n = size(A,1)
+    Bl = _diag(B, -1)
+    Bd = _diag(B, 0)
+    Bu = _diag(B, 1)
+    Ad = _diag(A, 0)
+    if A.uplo == 'U'
+        Au = _diag(A, 1)
+        @inbounds begin
+            for j in 3:n-2
+                Aj₋2j₋1 = Au[j-2]
+                Aj₋1j   = Au[j-1]
+                Ajj₊1   = Au[j]
+                Aj₋1j₋1 = Ad[j-1]
+                Ajj     = Ad[j]
+                Aj₊1j₊1 = Ad[j+1]
+                Bj₋1j   = Bu[j-1]
+                Bjj     = Bd[j]
+                Bj₊1j   = Bl[j]
+
+                C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j)
+                C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj)
+                C[j,   j] += _add(Ajj*Bjj       + Ajj₊1*Bj₊1j)
+                C[j+1, j] += _add(Aj₊1j₊1*Bj₊1j)
+            end
+        end
+    else # A.uplo == 'L'
+        Al = _diag(A, -1)
+        @inbounds begin
+            for j in 3:n-2
+                Aj₋1j₋1 = Ad[j-1]
+                Ajj     = Ad[j]
+                Aj₊1j₊1 = Ad[j+1]
+                Ajj₋1   = Al[j-1]
+                Aj₊1j   = Al[j]
+                Aj₊2j₊1 = Al[j+1]
+                Bj₋1j   = Bu[j-1]
+                Bjj     = Bd[j]
+                Bj₊1j   = Bl[j]
+
+                C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j)
+                C[j,   j] += _add(Ajj₋1*Bj₋1j   + Ajj*Bjj)
+                C[j+1, j] += _add(Aj₊1j*Bjj   + Aj₊1j₊1*Bj₊1j)
+                C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j)
+            end
+        end
+    end
+    C
+end
+function __bibimul!(C, A::Bidiagonal, B::Bidiagonal, _add)
+    n = size(A,1)
+    Ad = _diag(A, 0)
+    Bd = _diag(B, 0)
+    if A.uplo == 'U' && B.uplo == 'U'
+        Au = _diag(A, 1)
+        Bu = _diag(B, 1)
+        @inbounds begin
+            for j in 3:n-2
+                Aj₋2j₋1 = Au[j-2]
+                Aj₋1j   = Au[j-1]
+                Aj₋1j₋1 = Ad[j-1]
+                Ajj     = Ad[j]
+                Bj₋1j   = Bu[j-1]
+                Bjj     = Bd[j]
+
+                C[j-2, j] += _add(Aj₋2j₋1*Bj₋1j)
+                C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j + Aj₋1j*Bjj)
+                C[j,   j] += _add(Ajj*Bjj)
+            end
+        end
+    elseif A.uplo == 'U' && B.uplo == 'L'
+        Au = _diag(A, 1)
+        Bl = _diag(B, -1)
+        @inbounds begin
+            for j in 3:n-2
+                Aj₋1j   = Au[j-1]
+                Ajj₊1   = Au[j]
+                Ajj     = Ad[j]
+                Aj₊1j₊1 = Ad[j+1]
+                Bjj     = Bd[j]
+                Bj₊1j   = Bl[j]
+
+                C[j-1, j] += _add(Aj₋1j*Bjj)
+                C[j,   j] += _add(Ajj*Bjj + Ajj₊1*Bj₊1j)
+                C[j+1, j] += _add(Aj₊1j₊1*Bj₊1j)
+            end
+        end
+    elseif A.uplo == 'L' && B.uplo == 'U'
+        Al = _diag(A, -1)
+        Bu = _diag(B, 1)
+        @inbounds begin
+            for j in 3:n-2
+                Aj₋1j₋1 = Ad[j-1]
+                Ajj     = Ad[j]
+                Ajj₋1   = Al[j-1]
+                Aj₊1j   = Al[j]
+                Bj₋1j   = Bu[j-1]
+                Bjj     = Bd[j]
+
+                C[j-1, j] += _add(Aj₋1j₋1*Bj₋1j)
+                C[j,   j] += _add(Ajj₋1*Bj₋1j   + Ajj*Bjj)
+                C[j+1, j] += _add(Aj₊1j*Bjj)
+            end
+        end
+    else # A.uplo == 'L' && B.uplo == 'L'
+        Al = _diag(A, -1)
+        Bl = _diag(B, -1)
+        @inbounds begin
+            for j in 3:n-2
+                Ajj     = Ad[j]
+                Aj₊1j₊1 = Ad[j+1]
+                Aj₊1j   = Al[j]
+                Aj₊2j₊1 = Al[j+1]
+                Bjj     = Bd[j]
+                Bj₊1j   = Bl[j]
+
+                C[j,   j] += _add(Ajj*Bjj)
+                C[j+1, j] += _add(Aj₊1j*Bjj   + Aj₊1j₊1*Bj₊1j)
+                C[j+2, j] += _add(Aj₊2j₊1*Bj₊1j)
+            end
+        end
+    end
     C
 end
 
 function _mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, _add::MulAddMul)
     require_one_based_indexing(C)
-    check_A_mul_B!_sizes(C, A, B)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     n = size(A,1)
     iszero(n) && return C
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
     _rmul_or_fill!(C, _add.beta)  # see the same use above
     iszero(_add.alpha) && return C
     Al = _diag(A, -1)
@@ -643,41 +858,152 @@ function _mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, _add::MulAddMul)
     Bd = B.diag
     @inbounds begin
         # first row of C
-        C[1,1] += _add(A[1,1]*B[1,1])
-        C[1,2] += _add(A[1,2]*B[2,2])
+        for j in 1:min(2, n)
+            C[1,j] += _add(A[1,j]*B[j,j])
+        end
         # second row of C
-        C[2,1] += _add(A[2,1]*B[1,1])
-        C[2,2] += _add(A[2,2]*B[2,2])
-        C[2,3] += _add(A[2,3]*B[3,3])
+        if n > 1
+            for j in 1:min(3, n)
+                C[2,j] += _add(A[2,j]*B[j,j])
+            end
+        end
         for j in 3:n-2
             C[j, j-1] += _add(Al[j-1]*Bd[j-1])
             C[j, j  ] += _add(Ad[j  ]*Bd[j  ])
             C[j, j+1] += _add(Au[j  ]*Bd[j+1])
         end
-        # row before last of C
-        C[n-1,n-2] += _add(A[n-1,n-2]*B[n-2,n-2])
-        C[n-1,n-1] += _add(A[n-1,n-1]*B[n-1,n-1])
-        C[n-1,n  ] += _add(A[n-1,  n]*B[n  ,n  ])
+        if n > 3
+            # row before last of C
+            for j in n-2:n
+                C[n-1,j] += _add(A[n-1,j]*B[j,j])
+            end
+        end
         # last row of C
-        C[n,n-1] += _add(A[n,n-1]*B[n-1,n-1])
-        C[n,n  ] += _add(A[n,n  ]*B[n,  n  ])
+        if n > 2
+            for j in n-1:n
+                C[n,j] += _add(A[n,j]*B[j,j])
+            end
+        end
     end # inbounds
     C
 end
 
+function _mul!(C::AbstractMatrix, A::Bidiagonal, B::Diagonal, _add::MulAddMul)
+    require_one_based_indexing(C)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    n = size(A,1)
+    iszero(n) && return C
+    _rmul_or_fill!(C, _add.beta)  # see the same use above
+    iszero(_add.alpha) && return C
+    (; dv, ev) = A
+    Bd = B.diag
+    rowshift = A.uplo == 'U' ? -1 : 1
+    evshift = Int(A.uplo == 'U')
+    @inbounds begin
+        # first row of C
+        C[1,1] += _add(dv[1]*Bd[1])
+        if n > 1
+            if A.uplo == 'L'
+                C[2,1] += _add(ev[1]*Bd[1])
+            end
+            for col in 2:n-1
+                C[col+rowshift, col] += _add(ev[col - evshift]*Bd[col])
+                C[col, col] += _add(dv[col]*Bd[col])
+            end
+            if A.uplo == 'U'
+                C[n-1,n] += _add(ev[n-1]*Bd[n])
+            end
+            C[n, n] += _add(dv[n]*Bd[n])
+        end
+    end # inbounds
+    C
+end
+
+function _mul!(C::Bidiagonal, A::Bidiagonal, B::Diagonal, _add::MulAddMul)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    n = size(A,1)
+    iszero(n) && return C
+    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
+    Adv, Aev = A.dv, A.ev
+    Cdv, Cev = C.dv, C.ev
+    Bd = B.diag
+    shift = Int(A.uplo == 'U')
+    if C.uplo == A.uplo
+        @inbounds begin
+            _modify!(_add, Adv[1]*Bd[1], Cdv, 1)
+            for j in eachindex(IndexLinear(), Aev, Cev)
+                _modify!(_add, Aev[j]*Bd[j+shift], Cev, j)
+                _modify!(_add, Adv[j+1]*Bd[j+1], Cdv, j+1)
+            end
+        end # inbounds
+    else
+        @inbounds begin
+            _modify!(_add, Adv[1]*Bd[1], Cdv, 1)
+            for j in eachindex(IndexLinear(), Aev, Cev)
+                _modify!(_add, Adv[j+1]*Bd[j+1], Cdv, j+1)
+                # this branch will error unless the value is zero
+                _modify!(_add, Aev[j]*Bd[j+shift], C, (j+1-shift, j+shift))
+                # zeros of the correct type
+                _modify!(_add, A[j+shift, j+1-shift]*Bd[j+1-shift], Cev, j)
+            end
+        end
+    end
+    C
+end
+
 function _mul!(C::AbstractVecOrMat, A::BiTriSym, B::AbstractVecOrMat, _add::MulAddMul)
     require_one_based_indexing(C, B)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     nA = size(A,1)
     nB = size(B,2)
-    if !(size(C,1) == size(B,1) == nA)
-        throw(DimensionMismatch(lazy"A has first dimension $nA, B has $(size(B,1)), C has $(size(C,1)) but all must match"))
+    (iszero(nA) || iszero(nB)) && return C
+    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
+    if nA <= 3
+        # naive multiplication
+        for I in CartesianIndices(C)
+            col = Base.tail(Tuple(I))
+            _modify!(_add, sum(A[I[1], k] * B[k, col...] for k in axes(A,2)), C, I)
+        end
+        return C
     end
-    if size(C,2) != nB
-        throw(DimensionMismatch(lazy"A has second dimension $nA, B has $(size(B,2)), C has $(size(C,2)) but all must match"))
+    _mul_bitrisym!(C, A, B, _add)
+end
+function _mul_bitrisym!(C::AbstractVecOrMat, A::Bidiagonal, B::AbstractVecOrMat, _add::MulAddMul)
+    nA = size(A,1)
+    nB = size(B,2)
+    d = A.dv
+    if A.uplo == 'U'
+        u = A.ev
+        @inbounds begin
+            for j = 1:nB
+                b₀, b₊ = B[1, j], B[2, j]
+                _modify!(_add, d[1]*b₀ + u[1]*b₊, C, (1, j))
+                for i = 2:nA - 1
+                    b₀, b₊ = b₊, B[i + 1, j]
+                    _modify!(_add, d[i]*b₀ + u[i]*b₊, C, (i, j))
+                end
+                _modify!(_add, d[nA]*b₊, C, (nA, j))
+            end
+        end
+    else
+        l = A.ev
+        @inbounds begin
+            for j = 1:nB
+                b₀, b₊ = B[1, j], B[2, j]
+                _modify!(_add, d[1]*b₀, C, (1, j))
+                for i = 2:nA - 1
+                    b₋, b₀, b₊ = b₀, b₊, B[i + 1, j]
+                    _modify!(_add, l[i - 1]*b₋ + d[i]*b₀, C, (i, j))
+                end
+                _modify!(_add, l[nA - 1]*b₀ + d[nA]*b₊, C, (nA, j))
+            end
+        end
     end
-    iszero(nA) && return C
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    nA <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
+    C
+end
+function _mul_bitrisym!(C::AbstractVecOrMat, A::TriSym, B::AbstractVecOrMat, _add::MulAddMul)
+    nA = size(A,1)
+    nB = size(B,2)
     l = _diag(A, -1)
     d = _diag(A, 0)
     u = _diag(A, 1)
@@ -697,12 +1023,13 @@ end
 
 function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::TriSym, _add::MulAddMul)
     require_one_based_indexing(C, A)
-    check_A_mul_B!_sizes(C, A, B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     n = size(A,1)
     m = size(B,2)
-    if n <= 3 || m <= 1
-        return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
+    (iszero(_add.alpha) || iszero(m)) && return _rmul_or_fill!(C, _add.beta)
+    if m == 1
+        B11 = B[1,1]
+        return mul!(C, A, B11, _add.alpha, _add.beta)
     end
     Bl = _diag(B, -1)
     Bd = _diag(B, 0)
@@ -732,24 +1059,22 @@ end
 
 function _mul!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal, _add::MulAddMul)
     require_one_based_indexing(C, A)
-    check_A_mul_B!_sizes(C, A, B)
-    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
-    if size(A, 1) <= 3 || size(B, 2) <= 1
-        return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    end
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     m, n = size(A)
+    (iszero(m) || iszero(n)) && return C
+    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
     @inbounds if B.uplo == 'U'
+        for j in n:-1:2, i in 1:m
+            _modify!(_add, A[i,j] * B.dv[j] + A[i,j-1] * B.ev[j-1], C, (i, j))
+        end
         for i in 1:m
-            for j in n:-1:2
-                _modify!(_add, A[i,j] * B.dv[j] + A[i,j-1] * B.ev[j-1], C, (i, j))
-            end
             _modify!(_add, A[i,1] * B.dv[1], C, (i, 1))
         end
     else # uplo == 'L'
+        for j in 1:n-1, i in 1:m
+            _modify!(_add, A[i,j] * B.dv[j] + A[i,j+1] * B.ev[j], C, (i, j))
+        end
         for i in 1:m
-            for j in 1:n-1
-                _modify!(_add, A[i,j] * B.dv[j] + A[i,j+1] * B.ev[j], C, (i, j))
-            end
             _modify!(_add, A[i,n] * B.dv[n], C, (i, n))
         end
     end
@@ -762,11 +1087,20 @@ _mul!(C::AbstractMatrix, A::Diagonal, B::TriSym, _add::MulAddMul) =
     _dibimul!(C, A, B, _add)
 function _dibimul!(C, A, B, _add)
     require_one_based_indexing(C)
-    check_A_mul_B!_sizes(C, A, B)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
     n = size(A,1)
-    n <= 3 && return mul!(C, Array(A), Array(B), _add.alpha, _add.beta)
-    _rmul_or_fill!(C, _add.beta)  # see the same use above
+    iszero(n) && return C
+    # ensure that we fill off-band elements in the destination
+    _rmul_or_fill!(C, _add.beta)
     iszero(_add.alpha) && return C
+    if n <= 3
+        # For simplicity, use a naive multiplication for small matrices
+        # that loops over all elements.
+        for I in CartesianIndices(C)
+            C[I] += _add(A.diag[I[1]] * B[I[1], I[2]])
+        end
+        return C
+    end
     Ad = A.diag
     Bl = _diag(B, -1)
     Bd = _diag(B, 0)
@@ -795,6 +1129,69 @@ function _dibimul!(C, A, B, _add)
     end # inbounds
     C
 end
+function _dibimul!(C::AbstractMatrix, A::Diagonal, B::Bidiagonal, _add)
+    require_one_based_indexing(C)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    n = size(A,1)
+    iszero(n) && return C
+    # ensure that we fill off-band elements in the destination
+    _rmul_or_fill!(C, _add.beta)
+    iszero(_add.alpha) && return C
+    Ad = A.diag
+    Bdv, Bev = B.dv, B.ev
+    rowshift = B.uplo == 'U' ? -1 : 1
+    evshift = Int(B.uplo == 'U')
+    @inbounds begin
+        # first row of C
+        C[1,1] += _add(Ad[1]*Bdv[1])
+        if n > 1
+            if B.uplo == 'L'
+                C[2,1] += _add(Ad[2]*Bev[1])
+            end
+            for col in 2:n-1
+                evrow = col+rowshift
+                C[evrow, col] += _add(Ad[evrow]*Bev[col - evshift])
+                C[col, col] += _add(Ad[col]*Bdv[col])
+            end
+            if B.uplo == 'U'
+                C[n-1,n] += _add(Ad[n-1]*Bev[n-1])
+            end
+            C[n, n] += _add(Ad[n]*Bdv[n])
+        end
+    end # inbounds
+    C
+end
+function _dibimul!(C::Bidiagonal, A::Diagonal, B::Bidiagonal, _add)
+    check_A_mul_B!_sizes(size(C), size(A), size(B))
+    n = size(A,1)
+    n == 0 && return C
+    iszero(_add.alpha) && return _rmul_or_fill!(C, _add.beta)
+    Ad = A.diag
+    Bdv, Bev = B.dv, B.ev
+    Cdv, Cev = C.dv, C.ev
+    shift = Int(B.uplo == 'L')
+    if C.uplo == B.uplo
+        @inbounds begin
+            _modify!(_add, Ad[1]*Bdv[1], Cdv, 1)
+            for j in eachindex(IndexLinear(), Bev, Cev)
+                _modify!(_add, Ad[j+shift]*Bev[j], Cev, j)
+                _modify!(_add, Ad[j+1]*Bdv[j+1], Cdv, j+1)
+            end
+        end # inbounds
+    else
+        @inbounds begin
+            _modify!(_add, Ad[1]*Bdv[1], Cdv, 1)
+            for j in eachindex(IndexLinear(), Bev, Cev)
+                _modify!(_add, Ad[j+1]*Bdv[j+1], Cdv, j+1)
+                # this branch will error unless the value is zero
+                _modify!(_add, Ad[j+shift]*Bev[j], C, (j+shift, j+1-shift))
+                # zeros of the correct type
+                _modify!(_add, Ad[j+1-shift]*B[j+1-shift,j+shift], Cev, j)
+            end
+        end
+    end
+    C
+end
 
 function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index 413b7866c5444..3c15630091162 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -159,7 +159,7 @@ function check()
     interface = USE_BLAS64 ? :ilp64 : :lp64
     if !any(lib.interface == interface for lib in config.loaded_libs)
         interfacestr = uppercase(string(interface))
-        @error("No loaded BLAS libraries were built with $interfacestr support.")
+        println(Core.stderr, "No loaded BLAS libraries were built with $interfacestr support.")
         exit(1)
     end
 end
diff --git a/stdlib/LinearAlgebra/src/bunchkaufman.jl b/stdlib/LinearAlgebra/src/bunchkaufman.jl
index 5a73c656abe33..a44f1a1c99094 100644
--- a/stdlib/LinearAlgebra/src/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/src/bunchkaufman.jl
@@ -127,6 +127,9 @@ function bunchkaufman!(A::StridedMatrix{<:BlasFloat}, rook::Bool = false; check:
     end
 end
 
+bkcopy_oftype(A, S) = eigencopy_oftype(A, S)
+bkcopy_oftype(A::Symmetric{<:Complex}, S) = Symmetric(copytrito!(similar(parent(A), S, size(A)), A.data, A.uplo), sym_uplo(A.uplo))
+
 """
     bunchkaufman(A, rook::Bool=false; check = true) -> S::BunchKaufman
 
@@ -206,7 +209,7 @@ julia> S.L*S.D*S.L' - A[S.p, S.p]
 ```
 """
 bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =
-    bunchkaufman!(eigencopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
+    bunchkaufman!(bkcopy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
 
 BunchKaufman{T}(B::BunchKaufman) where {T} =
     BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info)
@@ -1540,7 +1543,7 @@ function bunchkaufman(A::AbstractMatrix{TS},
     rook::Bool = false;
     check::Bool = true
     ) where TS <: ClosedScalar{TR} where TR <: ClosedReal
-    return bunchkaufman!(eigencopy_oftype(A, TS), rook; check)
+    return bunchkaufman!(bkcopy_oftype(A, TS), rook; check)
 end
 
 function bunchkaufman(A::AbstractMatrix{TS},
@@ -1562,15 +1565,15 @@ function bunchkaufman(A::AbstractMatrix{TS},
     # We promote input to BigInt to avoid overflow problems
     if TA == Nothing
         if TS <: Integer
-            M = Rational{BigInt}.(eigencopy_oftype(A, TS))
+            M = Rational{BigInt}.(bkcopy_oftype(A, TS))
         else
-            M = Complex{Rational{BigInt}}.(eigencopy_oftype(A, TS))
+            M = Complex{Rational{BigInt}}.(bkcopy_oftype(A, TS))
         end
     else
         if TS <: Integer
-            M = TA(Rational{BigInt}.(eigencopy_oftype(A, TS)), Symbol(A.uplo))
+            M = TA(Rational{BigInt}.(bkcopy_oftype(A, TS)), Symbol(A.uplo))
         else
-            M = TA(Complex{Rational{BigInt}}.(eigencopy_oftype(A, TS)),
+            M = TA(Complex{Rational{BigInt}}.(bkcopy_oftype(A, TS)),
                 Symbol(A.uplo))
         end
     end
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
index cb7c6b94d4ca6..545d92ec1704d 100644
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ b/stdlib/LinearAlgebra/src/cholesky.jl
@@ -551,6 +551,9 @@ end
 # allow packages like SparseArrays.jl to hook into here and redirect to out-of-place `cholesky`
 _cholesky(A::AbstractMatrix, args...; kwargs...) = cholesky!(A, args...; kwargs...)
 
+# allow cholesky of cholesky
+cholesky(A::Cholesky) = A
+
 ## With pivoting
 """
     cholesky(A, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
index c441e8e658ac8..aacc5479bfa9d 100644
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ b/stdlib/LinearAlgebra/src/dense.jl
@@ -110,7 +110,7 @@ norm2(x::Union{Array{T},StridedVector{T}}) where {T<:BlasFloat} =
 # Conservative assessment of types that have zero(T) defined for themselves
 haszero(::Type) = false
 haszero(::Type{T}) where {T<:Number} = isconcretetype(T)
-@propagate_inbounds _zero(M::AbstractArray{T}, i, j) where {T} = haszero(T) ? zero(T) : zero(M[i,j])
+@propagate_inbounds _zero(M::AbstractArray{T}, inds...) where {T} = haszero(T) ? zero(T) : zero(M[inds...])
 
 """
     triu!(M, k::Integer)
@@ -370,13 +370,10 @@ julia> diagm([1,2,3])
 diagm(v::AbstractVector) = diagm(0 => v)
 diagm(m::Integer, n::Integer, v::AbstractVector) = diagm(m, n, 0 => v)
 
-function tr(A::Matrix{T}) where T
-    n = checksquare(A)
-    t = zero(T)
-    @inbounds @simd for i in 1:n
-        t += A[i,i]
-    end
-    t
+function tr(A::StridedMatrix{T}) where T
+    checksquare(A)
+    isempty(A) && return zero(T)
+    reduce(+, (A[i] for i in diagind(A, IndexStyle(A))))
 end
 
 _kronsize(A::AbstractMatrix, B::AbstractMatrix) = map(*, size(A), size(B))
@@ -565,9 +562,6 @@ function (^)(A::AbstractMatrix{T}, p::Real) where T
     isinteger(p) && return integerpow(A, p)
 
     # If possible, use diagonalization
-    if issymmetric(A)
-        return (Symmetric(A)^p)
-    end
     if ishermitian(A)
         return (Hermitian(A)^p)
     end
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index b3826a2aa7f82..0c93024f33a9a 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -185,8 +185,27 @@ end
     end
     r
 end
-diagzero(::Diagonal{T}, i, j) where {T} = zero(T)
-diagzero(D::Diagonal{<:AbstractMatrix{T}}, i, j) where {T} = zeros(T, size(D.diag[i], 1), size(D.diag[j], 2))
+"""
+    diagzero(A::AbstractMatrix, i, j)
+
+Return the appropriate zero element `A[i, j]` corresponding to a banded matrix `A`.
+"""
+diagzero(A::AbstractMatrix, i, j) = zero(eltype(A))
+diagzero(D::Diagonal{M}, i, j) where {M<:AbstractMatrix} =
+    zeroslike(M, axes(D.diag[i], 1), axes(D.diag[j], 2))
+# dispatching on the axes permits specializing on the axis types to return something other than an Array
+zeroslike(M::Type, ax::Vararg{Union{AbstractUnitRange, Integer}}) = zeroslike(M, ax)
+"""
+    zeroslike(::Type{M}, ax::Tuple{AbstractUnitRange, Vararg{AbstractUnitRange}}) where {M<:AbstractMatrix}
+    zeroslike(::Type{M}, sz::Tuple{Integer, Vararg{Integer}}) where {M<:AbstractMatrix}
+
+Return an appropriate zero-ed array similar to `M`, with either the axes `ax` or the size `sz`.
+This will be used as a structural zero element of a matrix-valued banded matrix.
+By default, `zeroslike` falls back to using the size along each axis to construct the array.
+"""
+zeroslike(M::Type, ax::Tuple{AbstractUnitRange, Vararg{AbstractUnitRange}}) = zeroslike(M, map(length, ax))
+zeroslike(M::Type, sz::Tuple{Integer, Vararg{Integer}}) = zeros(M, sz)
+zeroslike(::Type{M}, sz::Tuple{Integer, Vararg{Integer}}) where {M<:AbstractMatrix} = zeros(eltype(M), sz)
 
 @inline function getindex(D::Diagonal, b::BandIndex)
     @boundscheck checkbounds(D, b)
@@ -205,7 +224,7 @@ function setindex!(D::Diagonal, v, i::Int, j::Int)
     elseif !iszero(v)
         throw(ArgumentError(lazy"cannot set off-diagonal entry ($i, $j) to a nonzero value ($v)"))
     end
-    return v
+    return D
 end
 
 
@@ -213,6 +232,11 @@ end
 function Base.replace_in_print_matrix(A::Diagonal,i::Integer,j::Integer,s::AbstractString)
     i==j ? s : Base.replace_with_centered_mark(s)
 end
+function Base.show(io::IO, A::Diagonal)
+    print(io, "Diagonal(")
+    show(io, A.diag)
+    print(io, ")")
+end
 
 parent(D::Diagonal) = D.diag
 
@@ -222,7 +246,6 @@ Base._reverse(A::Diagonal, dims) = reverse!(Matrix(A); dims)
 Base._reverse(A::Diagonal, ::Colon) = Diagonal(reverse(A.diag))
 Base._reverse!(A::Diagonal, ::Colon) = (reverse!(A.diag); A)
 
-ishermitian(D::Diagonal{<:Real}) = true
 ishermitian(D::Diagonal{<:Number}) = isreal(D.diag)
 ishermitian(D::Diagonal) = all(ishermitian, D.diag)
 issymmetric(D::Diagonal{<:Number}) = true
@@ -267,23 +290,28 @@ end
 (+)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag + Db.diag)
 (-)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag - Db.diag)
 
-for f in (:+, :-)
-    @eval function $f(D::Diagonal, S::Symmetric)
-        return Symmetric($f(D, S.data), sym_uplo(S.uplo))
-    end
-    @eval function $f(S::Symmetric, D::Diagonal)
-        return Symmetric($f(S.data, D), sym_uplo(S.uplo))
-    end
-    @eval function $f(D::Diagonal{<:Real}, H::Hermitian)
-        return Hermitian($f(D, H.data), sym_uplo(H.uplo))
+(*)(x::Number, D::Diagonal) = Diagonal(x * D.diag)
+(*)(D::Diagonal, x::Number) = Diagonal(D.diag * x)
+function lmul!(x::Number, D::Diagonal)
+    if size(D,1) > 1
+        # ensure that zeros are preserved on scaling
+        y = D[2,1] * x
+        iszero(y) || throw(ArgumentError(LazyString("cannot set index (2, 1) off ",
+            lazy"the tridiagonal band to a nonzero value ($y)")))
     end
-    @eval function $f(H::Hermitian, D::Diagonal{<:Real})
-        return Hermitian($f(H.data, D), sym_uplo(H.uplo))
+    @. D.diag = x * D.diag
+    return D
+end
+function rmul!(D::Diagonal, x::Number)
+    if size(D,1) > 1
+        # ensure that zeros are preserved on scaling
+        y = x * D[2,1]
+        iszero(y) || throw(ArgumentError(LazyString("cannot set index (2, 1) off ",
+            lazy"the tridiagonal band to a nonzero value ($y)")))
     end
+    @. D.diag *= x
+    return D
 end
-
-(*)(x::Number, D::Diagonal) = Diagonal(x * D.diag)
-(*)(D::Diagonal, x::Number) = Diagonal(D.diag * x)
 (/)(D::Diagonal, x::Number) = Diagonal(D.diag / x)
 (\)(x::Number, D::Diagonal) = Diagonal(x \ D.diag)
 (^)(D::Diagonal, a::Number) = Diagonal(D.diag .^ a)
@@ -293,42 +321,39 @@ Base.literal_pow(::typeof(^), D::Diagonal, valp::Val) =
     Diagonal(Base.literal_pow.(^, D.diag, valp)) # for speed
 Base.literal_pow(::typeof(^), D::Diagonal, ::Val{-1}) = inv(D) # for disambiguation
 
-function _muldiag_size_check(A, B)
-    nA = size(A, 2)
-    mB = size(B, 1)
-    @noinline throw_dimerr(::AbstractMatrix, nA, mB) = throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match first dimension of B, $mB"))
-    @noinline throw_dimerr(::AbstractVector, nA, mB) = throw(DimensionMismatch(lazy"second dimension of D, $nA, does not match length of V, $mB"))
-    nA == mB || throw_dimerr(B, nA, mB)
+function _muldiag_size_check(szA::NTuple{2,Integer}, szB::Tuple{Integer,Vararg{Integer}})
+    nA = szA[2]
+    mB = szB[1]
+    @noinline throw_dimerr(szB::NTuple{2}, nA, mB) = throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match first dimension of B, $mB"))
+    @noinline throw_dimerr(szB::NTuple{1}, nA, mB) = throw(DimensionMismatch(lazy"second dimension of D, $nA, does not match length of V, $mB"))
+    nA == mB || throw_dimerr(szB, nA, mB)
     return nothing
 end
 # the output matrix should have the same size as the non-diagonal input matrix or vector
 @noinline throw_dimerr(szC, szA) = throw(DimensionMismatch(lazy"output matrix has size: $szC, but should have size $szA"))
-_size_check_out(C, ::Diagonal, A) = _size_check_out(C, A)
-_size_check_out(C, A, ::Diagonal) = _size_check_out(C, A)
-_size_check_out(C, A::Diagonal, ::Diagonal) = _size_check_out(C, A)
-function _size_check_out(C, A)
-    szA = size(A)
-    szC = size(C)
-    szA == szC || throw_dimerr(szC, szA)
-    return nothing
+function _size_check_out(szC::NTuple{2}, szA::NTuple{2}, szB::NTuple{2})
+    (szC[1] == szA[1] && szC[2] == szB[2]) || throw_dimerr(szC, (szA[1], szB[2]))
 end
-function _muldiag_size_check(C, A, B)
-    _muldiag_size_check(A, B)
-    _size_check_out(C, A, B)
+function _size_check_out(szC::NTuple{1}, szA::NTuple{2}, szB::NTuple{1})
+    szC[1] == szA[1] || throw_dimerr(szC, (szA[1],))
+end
+function _muldiag_size_check(szC::Tuple{Vararg{Integer}}, szA::Tuple{Vararg{Integer}}, szB::Tuple{Vararg{Integer}})
+    _muldiag_size_check(szA, szB)
+   _size_check_out(szC, szA, szB)
 end
 
 function (*)(Da::Diagonal, Db::Diagonal)
-    _muldiag_size_check(Da, Db)
+    _muldiag_size_check(size(Da), size(Db))
     return Diagonal(Da.diag .* Db.diag)
 end
 
 function (*)(D::Diagonal, V::AbstractVector)
-    _muldiag_size_check(D, V)
+    _muldiag_size_check(size(D), size(V))
     return D.diag .* V
 end
 
 function rmul!(A::AbstractMatrix, D::Diagonal)
-    _muldiag_size_check(A, D)
+    _muldiag_size_check(size(A), size(D))
     for I in CartesianIndices(A)
         row, col = Tuple(I)
         @inbounds A[row, col] *= D.diag[col]
@@ -337,7 +362,7 @@ function rmul!(A::AbstractMatrix, D::Diagonal)
 end
 # T .= T * D
 function rmul!(T::Tridiagonal, D::Diagonal)
-    _muldiag_size_check(T, D)
+    _muldiag_size_check(size(T), size(D))
     (; dl, d, du) = T
     d[1] *= D.diag[1]
     for i in axes(dl,1)
@@ -349,7 +374,7 @@ function rmul!(T::Tridiagonal, D::Diagonal)
 end
 
 function lmul!(D::Diagonal, B::AbstractVecOrMat)
-    _muldiag_size_check(D, B)
+    _muldiag_size_check(size(D), size(B))
     for I in CartesianIndices(B)
         row = I[1]
         @inbounds B[I] = D.diag[row] * B[I]
@@ -360,7 +385,7 @@ end
 # in-place multiplication with a diagonal
 # T .= D * T
 function lmul!(D::Diagonal, T::Tridiagonal)
-    _muldiag_size_check(D, T)
+    _muldiag_size_check(size(D), size(T))
     (; dl, d, du) = T
     d[1] = D.diag[1] * d[1]
     for i in axes(dl,1)
@@ -452,7 +477,7 @@ function __muldiag!(out, D1::Diagonal, D2::Diagonal, _add::MulAddMul{ais1,bis0})
 end
 
 function _mul_diag!(out, A, B, _add)
-    _muldiag_size_check(out, A, B)
+    _muldiag_size_check(size(out), size(A), size(B))
     __muldiag!(out, A, B, _add)
     return out
 end
@@ -469,14 +494,14 @@ _mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, _add) =
     _mul_diag!(C, Da, Db, _add)
 
 function (*)(Da::Diagonal, A::AbstractMatrix, Db::Diagonal)
-    _muldiag_size_check(Da, A)
-    _muldiag_size_check(A, Db)
+    _muldiag_size_check(size(Da), size(A))
+    _muldiag_size_check(size(A), size(Db))
     return broadcast(*, Da.diag, A, permutedims(Db.diag))
 end
 
 function (*)(Da::Diagonal, Db::Diagonal, Dc::Diagonal)
-    _muldiag_size_check(Da, Db)
-    _muldiag_size_check(Db, Dc)
+    _muldiag_size_check(size(Da), size(Db))
+    _muldiag_size_check(size(Db), size(Dc))
     return Diagonal(Da.diag .* Db.diag .* Dc.diag)
 end
 
@@ -684,9 +709,9 @@ function kron(A::Diagonal, B::SymTridiagonal)
 end
 function kron(A::Diagonal, B::Tridiagonal)
     # `_droplast!` is only guaranteed to work with `Vector`
-    kd = _makevector(kron(diag(A), B.d))
-    kdl = _droplast!(_makevector(kron(diag(A), _pushzero(B.dl))))
-    kdu = _droplast!(_makevector(kron(diag(A), _pushzero(B.du))))
+    kd = convert(Vector, kron(diag(A), B.d))
+    kdl = _droplast!(convert(Vector, kron(diag(A), _pushzero(B.dl))))
+    kdu = _droplast!(convert(Vector, kron(diag(A), _pushzero(B.du))))
     Tridiagonal(kdl, kd, kdu)
 end
 
@@ -743,18 +768,22 @@ adjoint(D::Diagonal{<:Number}) = Diagonal(vec(adjoint(D.diag)))
 adjoint(D::Diagonal{<:Number,<:Base.ReshapedArray{<:Number,1,<:Adjoint}}) = Diagonal(adjoint(parent(D.diag)))
 adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
 permutedims(D::Diagonal) = D
-permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
+permutedims(D::Diagonal, perm) = (Base.checkdims_perm(axes(D), axes(D), perm); D)
 
-function diag(D::Diagonal{T}, k::Integer=0) where T
+function diag(D::Diagonal, k::Integer=0)
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of k
     if k == 0
         return copyto!(similar(D.diag, length(D.diag)), D.diag)
     elseif -size(D,1) <= k <= size(D,1)
-        return fill!(similar(D.diag, size(D,1)-abs(k)), zero(T))
+        v = similar(D.diag, size(D,1)-abs(k))
+        for i in eachindex(v)
+            v[i] = D[BandIndex(k, i)]
+        end
+        return v
     else
-        throw(ArgumentError(string("requested diagonal, $k, must be at least $(-size(D, 1)) ",
-            "and at most $(size(D, 2)) for an $(size(D, 1))-by-$(size(D, 2)) matrix")))
+        throw(ArgumentError(LazyString(lazy"requested diagonal, $k, must be at least $(-size(D, 1)) ",
+            lazy"and at most $(size(D, 2)) for an $(size(D, 1))-by-$(size(D, 2)) matrix")))
     end
 end
 tr(D::Diagonal) = sum(tr, D.diag)
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
index bdd11dc822ef3..e5f23b4981616 100644
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ b/stdlib/LinearAlgebra/src/generic.jl
@@ -1676,7 +1676,7 @@ end
 """
     reflectorApply!(x, τ, A)
 
-Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - τ*[1; x] * [1; x]')*A`.
+Multiplies `A` in-place by a Householder reflection on the left. It is equivalent to `A .= (I - conj(τ)*[1; x[2:end]]*[1; x[2:end]]')*A`.
 """
 @inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat)
     require_one_based_indexing(x)
@@ -2014,20 +2014,12 @@ function copytrito!(B::AbstractMatrix, A::AbstractMatrix, uplo::AbstractChar)
     m1,n1 = size(B)
     A = Base.unalias(B, A)
     if uplo == 'U'
-        if n < m
-            (m1 < n || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($n,$n)"))
-        else
-            (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
-        end
+        LAPACK.lacpy_size_check((m1, n1), (n < m ? n : m, n))
         for j in 1:n, i in 1:min(j,m)
             @inbounds B[i,j] = A[i,j]
         end
     else # uplo == 'L'
-        if m < n
-            (m1 < m || n1 < m) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$m)"))
-        else
-            (m1 < m || n1 < n) && throw(DimensionMismatch(lazy"B of size ($m1,$n1) should have at least size ($m,$n)"))
-        end
+        LAPACK.lacpy_size_check((m1, n1), (m, m < n ? m : n))
         for j in 1:n, i in j:m
             @inbounds B[i,j] = A[i,j]
         end
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
index bbaca3c878293..524e57711ce3a 100644
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -446,7 +446,7 @@ This is useful because multiple shifted solves `(F + μ*I) \\ b`
 Iterating the decomposition produces the factors `F.Q, F.H, F.μ`.
 
 # Examples
-```jldoctest
+```julia-repl
 julia> A = [4. 9. 7.; 4. 4. 1.; 4. 3. 2.]
 3×3 Matrix{Float64}:
  4.0  9.0  7.0
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
index e9cfacfcd0cfd..97dff0031329b 100644
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ b/stdlib/LinearAlgebra/src/lapack.jl
@@ -5329,7 +5329,7 @@ for (syev, syevr, syevd, sygvd, elty) in
         #       INTEGER            INFO, LDA, LWORK, N
         # *     .. Array Arguments ..
         #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
-        function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+        Base.@constprop :aggressive function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
             @chkvalidparam 1 jobz ('N', 'V')
             chkuplo(uplo)
@@ -5429,7 +5429,7 @@ for (syev, syevr, syevd, sygvd, elty) in
         # *     .. Array Arguments ..
         #       INTEGER            IWORK( * )
         #       DOUBLE PRECISION   A( LDA, * ), W( * ), WORK( * )
-        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+        Base.@constprop :aggressive function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
             @chkvalidparam 1 jobz ('N', 'V')
             chkstride1(A)
@@ -5526,7 +5526,7 @@ for (syev, syevr, syevd, sygvd, elty, relty) in
         # *     .. Array Arguments ..
         #       DOUBLE PRECISION   RWORK( * ), W( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+        Base.@constprop :aggressive function syev!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
             @chkvalidparam 1 jobz ('N', 'V')
             chkstride1(A)
@@ -5639,7 +5639,7 @@ for (syev, syevr, syevd, sygvd, elty, relty) in
         #       INTEGER            IWORK( * )
         #       DOUBLE PRECISION   RWORK( * )
         #       COMPLEX*16         A( LDA, * ), WORK( * )
-        function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
+        Base.@constprop :aggressive function syevd!(jobz::AbstractChar, uplo::AbstractChar, A::AbstractMatrix{$elty})
             require_one_based_indexing(A)
             @chkvalidparam 1 jobz ('N', 'V')
             chkstride1(A)
@@ -6452,7 +6452,7 @@ for (gees, gges, gges3, elty) in
                     resize!(work, lwork)
                 end
             end
-            A, vs, iszero(wi) ? wr : complex.(wr, wi)
+            iszero(wi) ? (A, vs, wr) : (A, vs, complex.(wr, wi))
         end
 
         # *     .. Scalar Arguments ..
@@ -6833,7 +6833,7 @@ for (trexc, trsen, tgsen, elty) in
                     resize!(iwork, liwork)
                 end
             end
-            T, Q, iszero(wi) ? wr : complex.(wr, wi), s[], sep[]
+            iszero(wi) ? (T, Q, wr, s[], sep[]) : (T, Q, complex.(wr, wi), s[], sep[])
         end
         trsen!(select::AbstractVector{BlasInt}, T::AbstractMatrix{$elty}, Q::AbstractMatrix{$elty}) =
             trsen!('N', 'V', select, T, Q)
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
index 02b4411566290..81d10f930c8c5 100644
--- a/stdlib/LinearAlgebra/src/lbt.jl
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -17,7 +17,7 @@ end
 macro get_warn(map, key)
     return quote
         if !haskey($(esc(map)), $(esc(key)))
-            @warn(string("[LBT] Unknown key into ", $(string(map)), ": ", $(esc(key)), ", defaulting to :unknown"))
+            println(Core.stderr, string("Warning: [LBT] Unknown key into ", $(string(map)), ": ", $(esc(key)), ", defaulting to :unknown"))
             # All the unknown values share a common value: `-1`
             $(esc(map))[$(esc(LBT_INTERFACE_UNKNOWN))]
         else
@@ -132,7 +132,7 @@ struct LBTConfig
             if str_ptr != C_NULL
                 push!(exported_symbols, unsafe_string(str_ptr))
             else
-                @error("NULL string in lbt_config.exported_symbols[$(sym_idx)]")
+                println(Core.stderr, "Error: NULL string in lbt_config.exported_symbols[$(sym_idx)]")
             end
         end
 
@@ -284,6 +284,25 @@ function lbt_find_backing_library(symbol_name, interface::Symbol;
 end
 
 
+"""
+    lbt_forwarded_funcs(config::LBTConfig, lib::LBTLibraryInfo)
+
+Given a backing library `lib`, return the list of all functions that are
+forwarded to that library, as a vector of `String`s.
+"""
+function lbt_forwarded_funcs(config::LBTConfig, lib::LBTLibraryInfo)
+    forwarded_funcs = String[]
+    for (symbol_idx, symbol) in enumerate(config.exported_symbols)
+        forward_byte_offset = div(symbol_idx - 1, 8)
+        forward_byte_mask = 1 << mod(symbol_idx - 1, 8)
+        if lib.active_forwards[forward_byte_offset+1] & forward_byte_mask != 0x00
+            push!(forwarded_funcs, symbol)
+        end
+    end
+    return forwarded_funcs
+end
+
+
 ## NOTE: Manually setting forwards is referred to as the 'footgun API'.  It allows truly
 ## bizarre and complex setups to be created.  If you run into strange errors while using
 ## it, the first thing you should ask yourself is whether you've set things up properly.
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
index 412d375d3e842..b70f7d47b28dd 100644
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ b/stdlib/LinearAlgebra/src/matmul.jl
@@ -61,9 +61,10 @@ function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S}
 end
 
 # these will throw a DimensionMismatch unless B has 1 row (or 1 col for transposed case):
-(*)(a::AbstractVector, tB::TransposeAbsMat) = reshape(a, length(a), 1) * tB
-(*)(a::AbstractVector, adjB::AdjointAbsMat) = reshape(a, length(a), 1) * adjB
-(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a, length(a), 1) * B
+function (*)(a::AbstractVector, B::AbstractMatrix)
+    require_one_based_indexing(a)
+    reshape(a, length(a), 1) * B
+end
 
 # Add a level of indirection and specialize _mul! to avoid ambiguities in mul!
 @inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
@@ -122,7 +123,15 @@ function (*)(A::AbstractMatrix, B::AbstractMatrix)
     mul!(matprod_dest(A, B, TS), A, B)
 end
 
-matprod_dest(A, B, TS) = similar(B, TS, (size(A, 1), size(B, 2)))
+"""
+    matprod_dest(A, B, T)
+
+Return an appropriate `AbstractArray` with element type `T` that may be used to store the result of `A * B`.
+
+!!! compat
+    This function requires at least Julia 1.11
+"""
+matprod_dest(A, B, T) = similar(B, T, (size(A, 1), size(B, 2)))
 
 # optimization for dispatching to BLAS, e.g. *(::Matrix{Float32}, ::Matrix{Float64})
 # but avoiding the case *(::Matrix{<:BlasComplex}, ::Matrix{<:BlasReal})
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
index 0f81a07e12b08..9a89e58372d08 100644
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ b/stdlib/LinearAlgebra/src/qr.jl
@@ -417,7 +417,7 @@ true
     `qr` returns multiple types because LAPACK uses several representations
     that minimize the memory storage requirements of products of Householder
     elementary reflectors, so that the `Q` and `R` matrices can be stored
-    compactly rather as two separate dense matrices.
+    compactly rather than two separate dense matrices.
 """
 function qr(A::AbstractMatrix{T}, arg...; kwargs...) where T
     require_one_based_indexing(A)
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index 9633594574055..32a5476842933 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -21,16 +21,19 @@ function Tridiagonal(A::Bidiagonal)
     Tridiagonal(A.uplo == 'U' ? z : A.ev, A.dv, A.uplo == 'U' ? A.ev : z)
 end
 
+_diagview(S::SymTridiagonal{<:Number}) = S.dv
+_diagview(S::SymTridiagonal) = view(S, diagind(S, IndexStyle(S)))
+
 # conversions from SymTridiagonal to other special matrix types
-Diagonal(A::SymTridiagonal) = Diagonal(A.dv)
+Diagonal(A::SymTridiagonal) = Diagonal(_diagview(A))
 
 # These can fail when ev has the same length as dv
 # TODO: Revisit when a good solution for #42477 is found
-Bidiagonal(A::SymTridiagonal) =
+Bidiagonal(A::SymTridiagonal{<:Number}) =
     iszero(A.ev) ? Bidiagonal(A.dv, A.ev, :U) :
         throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
-Tridiagonal(A::SymTridiagonal) =
-    Tridiagonal(copy(A.ev), A.dv, A.ev)
+Tridiagonal(A::SymTridiagonal{<:Number}) =
+    Tridiagonal(A.ev, A.dv, A.ev)
 
 # conversions from Tridiagonal to other special matrix types
 Diagonal(A::Tridiagonal) = Diagonal(A.d)
@@ -109,6 +112,8 @@ for op in (:+, :-)
     end
 end
 
+(*)(Da::Diagonal, A::BandedMatrix, Db::Diagonal) = _tri_matmul(Da, A, Db)
+
 # disambiguation between triangular and banded matrices, banded ones "dominate"
 _mul!(C::AbstractMatrix, A::AbstractTriangular, B::BandedMatrix, alpha::Number, beta::Number) =
     @stable_muladdmul _mul!(C, A, B, MulAddMul(alpha, beta))
@@ -163,26 +168,45 @@ function (-)(A::Diagonal, B::Bidiagonal)
     Bidiagonal(newdv, typeof(newdv)(-B.ev), B.uplo)
 end
 
+# Return a SymTridiagonal if the elements of `newdv` are
+# statically known to be symmetric. Return a Tridiagonal otherwise
+function _symtri_or_tri(dl, d, du)
+    new_du = oftype(d, du)
+    new_dl = oftype(d, dl)
+    if symmetric_type(eltype(d)) == eltype(d)
+        SymTridiagonal(d, new_du)
+    else
+        Tridiagonal(new_dl, d, new_du)
+    end
+end
+
 @commutative function (+)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag + B.dv
-    SymTridiagonal(A.diag + B.dv, typeof(newdv)(B.ev))
+    newdv = A.diag + _diagview(B)
+    _symtri_or_tri(_evview_transposed(B), newdv, _evview(B))
 end
 
 function (-)(A::Diagonal, B::SymTridiagonal)
-    newdv = A.diag - B.dv
-    SymTridiagonal(newdv, typeof(newdv)(-B.ev))
+    newdv = A.diag - _diagview(B)
+    _symtri_or_tri(-_evview_transposed(B), newdv, -_evview(B))
 end
 
 function (-)(A::SymTridiagonal, B::Diagonal)
-    newdv = A.dv - B.diag
-    SymTridiagonal(newdv, typeof(newdv)(A.ev))
+    newdv = _diagview(A) - B.diag
+    _symtri_or_tri(_evview_transposed(A), newdv, _evview(A))
 end
 
 # this set doesn't have the aforementioned problem
-
-@commutative (+)(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+_evview(B), A.d+B.dv, A.du+_evview(B))
--(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-_evview(B), A.d-B.dv, A.du-_evview(B))
--(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)-B.dl, A.dv-B.d, _evview(A)-B.du)
+_evview_transposed(S::SymTridiagonal{<:Number}) = _evview(S)
+_evview_transposed(S::SymTridiagonal) = transpose.(_evview(S))
+@commutative function (+)(A::Tridiagonal, B::SymTridiagonal)
+    Tridiagonal(A.dl+_evview_transposed(B), A.d+_diagview(B), A.du+_evview(B))
+end
+function -(A::Tridiagonal, B::SymTridiagonal)
+    Tridiagonal(A.dl-_evview_transposed(B), A.d-_diagview(B), A.du-_evview(B))
+end
+function -(A::SymTridiagonal, B::Tridiagonal)
+    Tridiagonal(_evview_transposed(A)-B.dl, _diagview(A)-B.d, _evview(A)-B.du)
+end
 
 @commutative function (+)(A::Diagonal, B::Tridiagonal)
     newdv = A.diag + B.d
@@ -215,18 +239,18 @@ function (-)(A::Tridiagonal, B::Bidiagonal)
 end
 
 @commutative function (+)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv + B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview(B)), A.dv+B.dv, A.ev+_evview(B)) : (A.ev+_evview(B), A.dv+B.dv, typeof(newdv)(_evview(B))))...)
+    newdv = A.dv + _diagview(B)
+    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview_transposed(B)), newdv, A.ev+_evview(B)) : (A.ev+_evview_transposed(B), newdv, typeof(newdv)(_evview(B))))...)
 end
 
 function (-)(A::Bidiagonal, B::SymTridiagonal)
-    newdv = A.dv - B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview(B), newdv, typeof(newdv)(-_evview(B))))...)
+    newdv = A.dv - _diagview(B)
+    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview_transposed(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview_transposed(B), newdv, typeof(newdv)(-_evview(B))))...)
 end
 
 function (-)(A::SymTridiagonal, B::Bidiagonal)
-    newdv = A.dv - B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)-B.ev) : (_evview(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
+    newdv = _diagview(A) - B.dv
+    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview_transposed(A)), newdv, _evview(A)-B.ev) : (_evview_transposed(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
 end
 
 @commutative function (+)(A::Tridiagonal, B::UniformScaling)
@@ -256,7 +280,7 @@ function (-)(A::UniformScaling, B::Tridiagonal)
 end
 function (-)(A::UniformScaling, B::SymTridiagonal)
     dv = Ref(A) .- B.dv
-    SymTridiagonal(dv, convert(typeof(dv), -B.ev))
+    SymTridiagonal(dv, convert(typeof(dv), -_evview(B)))
 end
 function (-)(A::UniformScaling, B::Bidiagonal)
     dv = Ref(A) .- B.dv
@@ -266,6 +290,25 @@ function (-)(A::UniformScaling, B::Diagonal)
     Diagonal(Ref(A) .- B.diag)
 end
 
+for f in (:+, :-)
+    @eval function $f(D::Diagonal{<:Number}, S::Symmetric)
+        uplo = sym_uplo(S.uplo)
+        return Symmetric(parentof_applytri($f, Symmetric(D, uplo), S), uplo)
+    end
+    @eval function $f(S::Symmetric, D::Diagonal{<:Number})
+        uplo = sym_uplo(S.uplo)
+        return Symmetric(parentof_applytri($f, S, Symmetric(D, uplo)), uplo)
+    end
+    @eval function $f(D::Diagonal{<:Real}, H::Hermitian)
+        uplo = sym_uplo(H.uplo)
+        return Hermitian(parentof_applytri($f, Hermitian(D, uplo), H), uplo)
+    end
+    @eval function $f(H::Hermitian, D::Diagonal{<:Real})
+        uplo = sym_uplo(H.uplo)
+        return Hermitian(parentof_applytri($f, H, Hermitian(D, uplo)), uplo)
+    end
+end
+
 ## Diagonal construction from UniformScaling
 Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
 Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
@@ -286,7 +329,14 @@ _small_enough(A::Union{Diagonal, Bidiagonal}) = size(A, 1) <= 1
 _small_enough(A::Tridiagonal) = size(A, 1) <= 2
 _small_enough(A::SymTridiagonal) = size(A, 1) <= 2
 
-function fill!(A::Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal}, x)
+function fill!(A::Union{Diagonal,Bidiagonal,Tridiagonal}, x)
+    xT = convert(eltype(A), x)
+    (iszero(xT) || _small_enough(A)) && return fillstored!(A, xT)
+    throw(ArgumentError(lazy"array of type $(typeof(A)) and size $(size(A)) can
+    not be filled with $x, since some of its entries are constrained."))
+end
+function fill!(A::SymTridiagonal, x)
+    issymmetric(x) || throw(ArgumentError("cannot fill a SymTridiagonal with an asymmetric value"))
     xT = convert(eltype(A), x)
     (iszero(xT) || _small_enough(A)) && return fillstored!(A, xT)
     throw(ArgumentError(lazy"array of type $(typeof(A)) and size $(size(A)) can
@@ -320,20 +370,20 @@ function copyto!(dest::BandedMatrix, src::BandedMatrix)
 end
 function _copyto_banded!(T::Tridiagonal, D::Diagonal)
     T.d .= D.diag
-    T.dl .= zero.(T.dl)
-    T.du .= zero.(T.du)
+    T.dl .= view(D, diagind(D, -1, IndexStyle(D)))
+    T.du .= view(D, diagind(D,  1, IndexStyle(D)))
     return T
 end
 function _copyto_banded!(SymT::SymTridiagonal, D::Diagonal)
     issymmetric(D) || throw(ArgumentError("cannot copy a non-symmetric Diagonal matrix to a SymTridiagonal"))
     SymT.dv .= D.diag
     _ev = _evview(SymT)
-    _ev .= zero.(_ev)
+    _ev .= view(D, diagind(D,  1, IndexStyle(D)))
     return SymT
 end
 function _copyto_banded!(B::Bidiagonal, D::Diagonal)
     B.dv .= D.diag
-    B.ev .= zero.(B.ev)
+    B.ev .= view(D, diagind(D,  B.uplo == 'U' ? 1 : -1, IndexStyle(D)))
     return B
 end
 function _copyto_banded!(D::Diagonal, B::Bidiagonal)
@@ -361,10 +411,10 @@ function _copyto_banded!(T::Tridiagonal, B::Bidiagonal)
     T.d .= B.dv
     if B.uplo == 'U'
         T.du .= B.ev
-        T.dl .= zero.(T.dl)
+        T.dl .= view(B, diagind(B, -1, IndexStyle(B)))
     else
         T.dl .= B.ev
-        T.du .= zero.(T.du)
+        T.du .= view(B, diagind(B,  1, IndexStyle(B)))
     end
     return T
 end
@@ -372,7 +422,7 @@ function _copyto_banded!(SymT::SymTridiagonal, B::Bidiagonal)
     issymmetric(B) || throw(ArgumentError("cannot copy a non-symmetric Bidiagonal matrix to a SymTridiagonal"))
     SymT.dv .= B.dv
     _ev = _evview(SymT)
-    _ev .= zero.(_ev)
+    _ev .= B.ev
     return SymT
 end
 function _copyto_banded!(B::Bidiagonal, T::Tridiagonal)
@@ -399,7 +449,7 @@ end
 # SymTridiagonal == Tridiagonal is already defined in tridiag.jl
 
 ==(A::Diagonal, B::Bidiagonal) = iszero(B.ev) && A.diag == B.dv
-==(A::Diagonal, B::SymTridiagonal) = iszero(_evview(B)) && A.diag == B.dv
+==(A::Diagonal, B::SymTridiagonal) = iszero(_evview(B)) && A.diag == _diagview(B)
 ==(B::Bidiagonal, A::Diagonal) = A == B
 ==(A::Diagonal, B::Tridiagonal) = iszero(B.dl) && iszero(B.du) && A.diag == B.d
 ==(B::Tridiagonal, A::Diagonal) = A == B
@@ -413,7 +463,7 @@ function ==(A::Bidiagonal, B::Tridiagonal)
 end
 ==(B::Tridiagonal, A::Bidiagonal) = A == B
 
-==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == B.dv
+==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == _diagview(B)
 ==(B::SymTridiagonal, A::Bidiagonal) = A == B
 
 # TODO: remove these deprecations (used by SparseArrays in the past)
@@ -538,3 +588,7 @@ function cholesky(S::RealHermSymComplexHerm{<:Real,<:SymTridiagonal}, ::NoPivot
     B = Bidiagonal{T}(diag(S, 0), diag(S, S.uplo == 'U' ? 1 : -1), sym_uplo(S.uplo))
     cholesky!(Hermitian(B, sym_uplo(S.uplo)), NoPivot(); check = check)
 end
+
+# istriu/istril for triangular wrappers of structured matrices
+_istril(A::LowerTriangular{<:Any, <:BandedMatrix}, k) = istril(parent(A), k)
+_istriu(A::UpperTriangular{<:Any, <:BandedMatrix}, k) = istriu(parent(A), k)
diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
index 21f6a7414d872..0c06f84116fc7 100644
--- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
@@ -96,7 +96,7 @@ structured_broadcast_alloc(bc, ::Type{UnitLowerTriangular}, ::Type{ElType}, n) w
 structured_broadcast_alloc(bc, ::Type{UnitUpperTriangular}, ::Type{ElType}, n) where {ElType} =
     UnitUpperTriangular(Array{ElType}(undef, n, n))
 structured_broadcast_alloc(bc, ::Type{Matrix}, ::Type{ElType}, n) where {ElType} =
-    Matrix(Array{ElType}(undef, n, n))
+    Array{ElType}(undef, n, n)
 
 # A _very_ limited list of structure-preserving functions known at compile-time. This list is
 # derived from the formerly-implemented `broadcast` methods in 0.6. Note that this must
@@ -199,6 +199,8 @@ function Broadcast.newindex(A::StructuredMatrix, b::BandIndex)
     # and we apply newindex to both the axes at once to obtain the result
     size(A,1) > 1 ? b : BandIndex(0, 1)
 end
+# All structured matrices are square, and therefore they only broadcast out if they are size (1, 1)
+Broadcast.newindex(D::StructuredMatrix, I::CartesianIndex{2}) = size(D) == (1,1) ? CartesianIndex(1,1) : I
 
 function copyto!(dest::Diagonal, bc::Broadcasted{<:StructuredMatrixStyle})
     isvalidstructbc(dest, bc) || return copyto!(dest, convert(Broadcasted{Nothing}, bc))
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
index f58670e255b58..e17eb80d25453 100644
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ b/stdlib/LinearAlgebra/src/symmetric.jl
@@ -12,7 +12,7 @@ struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     end
 end
 """
-    Symmetric(A, uplo=:U)
+    Symmetric(A::AbstractMatrix, uplo::Symbol=:U)
 
 Construct a `Symmetric` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
 triangle of the matrix `A`.
@@ -63,7 +63,7 @@ function Symmetric(A::AbstractMatrix, uplo::Symbol=:U)
 end
 
 """
-    symmetric(A, uplo=:U)
+    symmetric(A, uplo::Symbol=:U)
 
 Construct a symmetric view of `A`. If `A` is a matrix, `uplo` controls whether the upper
 (if `uplo = :U`) or lower (if `uplo = :L`) triangle of `A` is used to implicitly fill the
@@ -105,7 +105,7 @@ struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
     end
 end
 """
-    Hermitian(A, uplo=:U)
+    Hermitian(A::AbstractMatrix, uplo::Symbol=:U)
 
 Construct a `Hermitian` view of the upper (if `uplo = :U`) or lower (if `uplo = :L`)
 triangle of the matrix `A`.
@@ -153,7 +153,7 @@ function Hermitian(A::AbstractMatrix, uplo::Symbol=:U)
 end
 
 """
-    hermitian(A, uplo=:U)
+    hermitian(A, uplo::Symbol=:U)
 
 Construct a hermitian view of `A`. If `A` is a matrix, `uplo` controls whether the upper
 (if `uplo = :U`) or lower (if `uplo = :L`) triangle of `A` is used to implicitly fill the
@@ -261,6 +261,7 @@ Base._reverse(A::Symmetric, ::Colon) = Symmetric(reverse(A.data), A.uplo == 'U'
 @propagate_inbounds function setindex!(A::Symmetric, v, i::Integer, j::Integer)
     i == j || throw(ArgumentError("Cannot set a non-diagonal index in a symmetric matrix"))
     setindex!(A.data, v, i, j)
+    return A
 end
 
 Base._reverse(A::Hermitian, dims) = reverse!(Matrix(A); dims)
@@ -274,6 +275,7 @@ Base._reverse(A::Hermitian, ::Colon) = Hermitian(reverse(A.data), A.uplo == 'U'
     else
         setindex!(A.data, v, i, j)
     end
+    return A
 end
 
 Base.dataids(A::HermOrSym) = Base.dataids(parent(A))
@@ -449,8 +451,8 @@ Base.copy(A::Adjoint{<:Any,<:Symmetric}) =
 Base.copy(A::Transpose{<:Any,<:Hermitian}) =
     Hermitian(copy(transpose(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
 
-tr(A::Symmetric) = tr(A.data) # to avoid AbstractMatrix fallback (incl. allocations)
-tr(A::Hermitian) = real(tr(A.data))
+tr(A::Symmetric{<:Number}) = tr(A.data) # to avoid AbstractMatrix fallback (incl. allocations)
+tr(A::Hermitian{<:Number}) = real(tr(A.data))
 
 Base.conj(A::Symmetric) = Symmetric(parentof_applytri(conj, A), sym_uplo(A.uplo))
 Base.conj(A::Hermitian) = Hermitian(parentof_applytri(conj, A), sym_uplo(A.uplo))
@@ -685,10 +687,10 @@ for f in (:+, :-)
     @eval begin
         $f(A::Hermitian, B::Symmetric{<:Real}) = $f(A, Hermitian(parent(B), sym_uplo(B.uplo)))
         $f(A::Symmetric{<:Real}, B::Hermitian) = $f(Hermitian(parent(A), sym_uplo(A.uplo)), B)
-        $f(A::SymTridiagonal, B::Symmetric) = Symmetric($f(A, B.data), sym_uplo(B.uplo))
-        $f(A::Symmetric, B::SymTridiagonal) = Symmetric($f(A.data, B), sym_uplo(A.uplo))
-        $f(A::SymTridiagonal{<:Real}, B::Hermitian) = Hermitian($f(A, B.data), sym_uplo(B.uplo))
-        $f(A::Hermitian, B::SymTridiagonal{<:Real}) = Hermitian($f(A.data, B), sym_uplo(A.uplo))
+        $f(A::SymTridiagonal, B::Symmetric) = $f(Symmetric(A, sym_uplo(B.uplo)), B)
+        $f(A::Symmetric, B::SymTridiagonal) = $f(A, Symmetric(B, sym_uplo(A.uplo)))
+        $f(A::SymTridiagonal{<:Real}, B::Hermitian) = $f(Hermitian(A, sym_uplo(B.uplo)), B)
+        $f(A::Hermitian, B::SymTridiagonal{<:Real}) = $f(A, Hermitian(B, sym_uplo(A.uplo)))
     end
 end
 
@@ -808,26 +810,32 @@ end
 # Matrix functions
 ^(A::Symmetric{<:Real}, p::Integer) = sympow(A, p)
 ^(A::Symmetric{<:Complex}, p::Integer) = sympow(A, p)
-function sympow(A::Symmetric, p::Integer)
-    if p < 0
-        return Symmetric(Base.power_by_squaring(inv(A), -p))
-    else
-        return Symmetric(Base.power_by_squaring(A, p))
-    end
-end
-function ^(A::Symmetric{<:Real}, p::Real)
-    isinteger(p) && return integerpow(A, p)
-    F = eigen(A)
-    if all(λ -> λ ≥ 0, F.values)
-        return Symmetric((F.vectors * Diagonal((F.values).^p)) * F.vectors')
-    else
-        return Symmetric((F.vectors * Diagonal((complex(F.values)).^p)) * F.vectors')
+^(A::SymTridiagonal{<:Real}, p::Integer) = sympow(A, p)
+^(A::SymTridiagonal{<:Complex}, p::Integer) = sympow(A, p)
+for hermtype in (:Symmetric, :SymTridiagonal)
+    @eval begin
+        function sympow(A::$hermtype, p::Integer)
+            if p < 0
+                return Symmetric(Base.power_by_squaring(inv(A), -p))
+            else
+                return Symmetric(Base.power_by_squaring(A, p))
+            end
+        end
+        function ^(A::$hermtype{<:Real}, p::Real)
+            isinteger(p) && return integerpow(A, p)
+            F = eigen(A)
+            if all(λ -> λ ≥ 0, F.values)
+                return Symmetric((F.vectors * Diagonal((F.values).^p)) * F.vectors')
+            else
+                return Symmetric((F.vectors * Diagonal(complex.(F.values).^p)) * F.vectors')
+            end
+        end
+        function ^(A::$hermtype{<:Complex}, p::Real)
+            isinteger(p) && return integerpow(A, p)
+            return Symmetric(schurpow(A, p))
+        end
     end
 end
-function ^(A::Symmetric{<:Complex}, p::Real)
-    isinteger(p) && return integerpow(A, p)
-    return Symmetric(schurpow(A, p))
-end
 function ^(A::Hermitian, p::Integer)
     if p < 0
         retmat = Base.power_by_squaring(inv(A), -p)
@@ -853,16 +861,25 @@ function ^(A::Hermitian{T}, p::Real) where T
             return Hermitian(retmat)
         end
     else
-        return (F.vectors * Diagonal((complex(F.values).^p))) * F.vectors'
+        retmat = (F.vectors * Diagonal((complex.(F.values).^p))) * F.vectors'
+        if T <: Real
+            return Symmetric(retmat)
+        else
+            return retmat
+        end
     end
 end
 
-for func in (:exp, :cos, :sin, :tan, :cosh, :sinh, :tanh, :atan, :asinh, :atanh)
-    @eval begin
-        function ($func)(A::HermOrSym{<:Real})
-            F = eigen(A)
-            return Symmetric((F.vectors * Diagonal(($func).(F.values))) * F.vectors')
+for func in (:exp, :cos, :sin, :tan, :cosh, :sinh, :tanh, :atan, :asinh, :atanh, :cbrt)
+    for (hermtype, wrapper) in [(:Symmetric, :Symmetric), (:SymTridiagonal, :Symmetric), (:Hermitian, :Hermitian)]
+        @eval begin
+            function ($func)(A::$hermtype{<:Real})
+                F = eigen(A)
+                return $wrapper((F.vectors * Diagonal(($func).(F.values))) * F.vectors')
+            end
         end
+    end
+    @eval begin
         function ($func)(A::Hermitian{<:Complex})
             n = checksquare(A)
             F = eigen(A)
@@ -875,23 +892,34 @@ for func in (:exp, :cos, :sin, :tan, :cosh, :sinh, :tanh, :atan, :asinh, :atanh)
     end
 end
 
-function cis(A::Union{RealHermSymComplexHerm,SymTridiagonal{<:Real}})
+for wrapper in (:Symmetric, :Hermitian, :SymTridiagonal)
+    @eval begin
+        function cis(A::$wrapper{<:Real})
+            F = eigen(A)
+            return Symmetric(F.vectors .* cis.(F.values') * F.vectors')
+        end
+    end
+end
+function cis(A::Hermitian{<:Complex})
     F = eigen(A)
-    # The returned matrix is unitary, and is complex-symmetric for real A
     return F.vectors .* cis.(F.values') * F.vectors'
 end
 
+
 for func in (:acos, :asin)
-    @eval begin
-        function ($func)(A::HermOrSym{<:Real})
-            F = eigen(A)
-            if all(λ -> -1 ≤ λ ≤ 1, F.values)
-                retmat = (F.vectors * Diagonal(($func).(F.values))) * F.vectors'
-            else
-                retmat = (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
+    for (hermtype, wrapper) in [(:Symmetric, :Symmetric), (:SymTridiagonal, :Symmetric), (:Hermitian, :Hermitian)]
+        @eval begin
+            function ($func)(A::$hermtype{<:Real})
+                F = eigen(A)
+                if all(λ -> -1 ≤ λ ≤ 1, F.values)
+                    return $wrapper((F.vectors * Diagonal(($func).(F.values))) * F.vectors')
+                else
+                    return Symmetric((F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors')
+                end
             end
-            return Symmetric(retmat)
         end
+    end
+    @eval begin
         function ($func)(A::Hermitian{<:Complex})
             n = checksquare(A)
             F = eigen(A)
@@ -908,14 +936,17 @@ for func in (:acos, :asin)
     end
 end
 
-function acosh(A::HermOrSym{<:Real})
-    F = eigen(A)
-    if all(λ -> λ ≥ 1, F.values)
-        retmat = (F.vectors * Diagonal(acosh.(F.values))) * F.vectors'
-    else
-        retmat = (F.vectors * Diagonal(acosh.(complex.(F.values)))) * F.vectors'
+for (hermtype, wrapper) in [(:Symmetric, :Symmetric), (:SymTridiagonal, :Symmetric), (:Hermitian, :Hermitian)]
+    @eval begin
+        function acosh(A::$hermtype{<:Real})
+            F = eigen(A)
+            if all(λ -> λ ≥ 1, F.values)
+                return $wrapper((F.vectors * Diagonal(acosh.(F.values))) * F.vectors')
+            else
+                return Symmetric((F.vectors * Diagonal(acosh.(complex.(F.values)))) * F.vectors')
+            end
+        end
     end
-    return Symmetric(retmat)
 end
 function acosh(A::Hermitian{<:Complex})
     n = checksquare(A)
@@ -931,14 +962,18 @@ function acosh(A::Hermitian{<:Complex})
     end
 end
 
-function sincos(A::HermOrSym{<:Real})
-    n = checksquare(A)
-    F = eigen(A)
-    S, C = Diagonal(similar(A, (n,))), Diagonal(similar(A, (n,)))
-    for i in 1:n
-        S.diag[i], C.diag[i] = sincos(F.values[i])
+for (hermtype, wrapper) in [(:Symmetric, :Symmetric), (:SymTridiagonal, :Symmetric), (:Hermitian, :Hermitian)]
+    @eval begin
+        function sincos(A::$hermtype{<:Real})
+            n = checksquare(A)
+            F = eigen(A)
+            S, C = Diagonal(similar(A, (n,))), Diagonal(similar(A, (n,)))
+            for i in 1:n
+                S.diag[i], C.diag[i] = sincos(F.values[i])
+            end
+            return $wrapper((F.vectors * S) * F.vectors'), $wrapper((F.vectors * C) * F.vectors')
+        end
     end
-    return Symmetric((F.vectors * S) * F.vectors'), Symmetric((F.vectors * C) * F.vectors')
 end
 function sincos(A::Hermitian{<:Complex})
     n = checksquare(A)
@@ -960,18 +995,20 @@ for func in (:log, :sqrt)
     # sqrt has rtol arg to handle matrices that are semidefinite up to roundoff errors
     rtolarg = func === :sqrt ? Any[Expr(:kw, :(rtol::Real), :(eps(real(float(one(T))))*size(A,1)))] : Any[]
     rtolval = func === :sqrt ? :(-maximum(abs, F.values) * rtol) : 0
-    @eval begin
-        function ($func)(A::HermOrSym{T}; $(rtolarg...)) where {T<:Real}
-            F = eigen(A)
-            λ₀ = $rtolval # treat λ ≥ λ₀ as "zero" eigenvalues up to roundoff
-            if all(λ -> λ ≥ λ₀, F.values)
-                retmat = (F.vectors * Diagonal(($func).(max.(0, F.values)))) * F.vectors'
-            else
-                retmat = (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
+    for (hermtype, wrapper) in [(:Symmetric, :Symmetric), (:SymTridiagonal, :Symmetric), (:Hermitian, :Hermitian)]
+        @eval begin
+            function ($func)(A::$hermtype{T}; $(rtolarg...)) where {T<:Real}
+                F = eigen(A)
+                λ₀ = $rtolval # treat λ ≥ λ₀ as "zero" eigenvalues up to roundoff
+                if all(λ -> λ ≥ λ₀, F.values)
+                    return $wrapper((F.vectors * Diagonal(($func).(max.(0, F.values)))) * F.vectors')
+                else
+                    return Symmetric((F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors')
+                end
             end
-            return Symmetric(retmat)
         end
-
+    end
+    @eval begin
         function ($func)(A::Hermitian{T}; $(rtolarg...)) where {T<:Complex}
             n = checksquare(A)
             F = eigen(A)
@@ -983,22 +1020,15 @@ for func in (:log, :sqrt)
                 end
                 return Hermitian(retmat)
             else
-                retmat = (F.vectors * Diagonal(($func).(complex(F.values)))) * F.vectors'
+                retmat = (F.vectors * Diagonal(($func).(complex.(F.values)))) * F.vectors'
                 return retmat
             end
         end
     end
 end
 
-# Cube root of a real-valued symmetric matrix
-function cbrt(A::HermOrSym{<:Real})
-    F = eigen(A)
-    A = F.vectors * Diagonal(cbrt.(F.values)) * F.vectors'
-    return A
-end
-
 """
-    hermitianpart(A, uplo=:U) -> Hermitian
+    hermitianpart(A::AbstractMatrix, uplo::Symbol=:U) -> Hermitian
 
 Return the Hermitian part of the square matrix `A`, defined as `(A + A') / 2`, as a
 [`Hermitian`](@ref) matrix. For real matrices `A`, this is also known as the symmetric part
@@ -1014,7 +1044,7 @@ See also [`hermitianpart!`](@ref) for the corresponding in-place operation.
 hermitianpart(A::AbstractMatrix, uplo::Symbol=:U) = Hermitian(_hermitianpart(A), uplo)
 
 """
-    hermitianpart!(A, uplo=:U) -> Hermitian
+    hermitianpart!(A::AbstractMatrix, uplo::Symbol=:U) -> Hermitian
 
 Overwrite the square matrix `A` in-place with its Hermitian part `(A + A') / 2`, and return
 [`Hermitian(A, uplo)`](@ref). For real matrices `A`, this is also known as the symmetric
diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl
index 666b9a9bc81df..68a1b29f5dbc7 100644
--- a/stdlib/LinearAlgebra/src/symmetriceigen.jl
+++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl
@@ -4,6 +4,7 @@
 # Call `copytrito!` instead of `copy_similar` to only copy the matching triangular half
 eigencopy_oftype(A::Hermitian, S) = Hermitian(copytrito!(similar(parent(A), S, size(A)), A.data, A.uplo), sym_uplo(A.uplo))
 eigencopy_oftype(A::Symmetric, S) = Symmetric(copytrito!(similar(parent(A), S, size(A)), A.data, A.uplo), sym_uplo(A.uplo))
+eigencopy_oftype(A::Symmetric{<:Complex}, S) = copyto!(similar(parent(A), S), A)
 
 default_eigen_alg(A) = DivideAndConquer()
 
@@ -19,13 +20,6 @@ function eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, alg::Algo
         throw(ArgumentError("Unsupported value for `alg` keyword."))
     end
 end
-function eigen(A::RealHermSymComplexHerm{Float16}; sortby::Union{Function,Nothing}=nothing)
-    S = eigtype(eltype(A))
-    E = eigen!(eigencopy_oftype(A, S), sortby=sortby)
-    values = convert(AbstractVector{Float16}, E.values)
-    vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors)
-    return Eigen(values, vectors)
-end
 
 """
     eigen(A::Union{Hermitian, Symmetric}, alg::Algorithm = default_eigen_alg(A)) -> Eigen
@@ -52,10 +46,22 @@ The default `alg` used may change in the future.
 The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
 """
 function eigen(A::RealHermSymComplexHerm, alg::Algorithm = default_eigen_alg(A); sortby::Union{Function,Nothing}=nothing)
+    _eigen(A, alg; sortby)
+end
+
+# we dispatch on the eltype in an internal method to avoid ambiguities
+function _eigen(A::RealHermSymComplexHerm, alg::Algorithm; sortby)
     S = eigtype(eltype(A))
     eigen!(eigencopy_oftype(A, S), alg; sortby)
 end
 
+function _eigen(A::RealHermSymComplexHerm{Float16}, alg::Algorithm; sortby::Union{Function,Nothing}=nothing)
+    S = eigtype(eltype(A))
+    E = eigen!(eigencopy_oftype(A, S), alg, sortby=sortby)
+    values = convert(AbstractVector{Float16}, E.values)
+    vectors = convert(AbstractMatrix{isreal(E.vectors) ? Float16 : Complex{Float16}}, E.vectors)
+    return Eigen(values, vectors)
+end
 
 eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
     Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl
index c45db3e90fab2..e1d61e4035966 100644
--- a/stdlib/LinearAlgebra/src/triangular.jl
+++ b/stdlib/LinearAlgebra/src/triangular.jl
@@ -236,6 +236,20 @@ Base.isstored(A::UpperTriangular, i::Int, j::Int) =
 @propagate_inbounds getindex(A::UpperTriangular, i::Int, j::Int) =
     i <= j ? A.data[i,j] : _zero(A.data,j,i)
 
+# these specialized getindex methods enable constant-propagation of the band
+Base.@constprop :aggressive @propagate_inbounds function getindex(A::UnitLowerTriangular{T}, b::BandIndex) where {T}
+    b.band < 0 ? A.data[b] : ifelse(b.band == 0, oneunit(T), zero(T))
+end
+Base.@constprop :aggressive @propagate_inbounds function getindex(A::LowerTriangular, b::BandIndex)
+    b.band <= 0 ? A.data[b] : _zero(A.data, b)
+end
+Base.@constprop :aggressive @propagate_inbounds function getindex(A::UnitUpperTriangular{T}, b::BandIndex) where {T}
+    b.band > 0 ? A.data[b] : ifelse(b.band == 0, oneunit(T), zero(T))
+end
+Base.@constprop :aggressive @propagate_inbounds function getindex(A::UpperTriangular, b::BandIndex)
+    b.band >= 0 ? A.data[b] : _zero(A.data, b)
+end
+
 _zero_triangular_half_str(::Type{<:UpperOrUnitUpperTriangular}) = "lower"
 _zero_triangular_half_str(::Type{<:LowerOrUnitLowerTriangular}) = "upper"
 
@@ -330,14 +344,32 @@ function Base.replace_in_print_matrix(A::Union{LowerTriangular,UnitLowerTriangul
     return i >= j ? s : Base.replace_with_centered_mark(s)
 end
 
-Base.@constprop :aggressive function istril(A::Union{LowerTriangular,UnitLowerTriangular}, k::Integer=0)
+istril(A::UnitLowerTriangular, k::Integer=0) = k >= 0
+istriu(A::UnitUpperTriangular, k::Integer=0) = k <= 0
+Base.@constprop :aggressive function istril(A::LowerTriangular, k::Integer=0)
     k >= 0 && return true
     return _istril(A, k)
 end
-Base.@constprop :aggressive function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
+@inline function _istril(A::LowerTriangular, k)
+    P = parent(A)
+    m = size(A, 1)
+    for j in max(1, k + 2):m
+        all(iszero, view(P, j:min(j - k - 1, m), j)) || return false
+    end
+    return true
+end
+Base.@constprop :aggressive function istriu(A::UpperTriangular, k::Integer=0)
     k <= 0 && return true
     return _istriu(A, k)
 end
+@inline function _istriu(A::UpperTriangular, k)
+    P = parent(A)
+    m = size(A, 1)
+    for j in 1:min(m, m + k - 1)
+        all(iszero, view(P, max(1, j - k + 1):j, j)) || return false
+    end
+    return true
+end
 istril(A::Adjoint, k::Integer=0) = istriu(A.parent, -k)
 istril(A::Transpose, k::Integer=0) = istriu(A.parent, -k)
 istriu(A::Adjoint, k::Integer=0) = istril(A.parent, -k)
@@ -702,6 +734,43 @@ function _triscale!(A::LowerOrUnitLowerTriangular, c::Number, B::UnitLowerTriang
     return A
 end
 
+function _trirdiv!(A::UpperTriangular, B::UpperOrUnitUpperTriangular, c::Number)
+    n = checksize1(A, B)
+    for j in 1:n
+        for i in 1:j
+            @inbounds A[i, j] = B[i, j] / c
+        end
+    end
+    return A
+end
+function _trirdiv!(A::LowerTriangular, B::LowerOrUnitLowerTriangular, c::Number)
+    n = checksize1(A, B)
+    for j in 1:n
+        for i in j:n
+            @inbounds A[i, j] = B[i, j] / c
+        end
+    end
+    return A
+end
+function _trildiv!(A::UpperTriangular, c::Number, B::UpperOrUnitUpperTriangular)
+    n = checksize1(A, B)
+    for j in 1:n
+        for i in 1:j
+            @inbounds A[i, j] = c \ B[i, j]
+        end
+    end
+    return A
+end
+function _trildiv!(A::LowerTriangular, c::Number, B::LowerOrUnitLowerTriangular)
+    n = checksize1(A, B)
+    for j in 1:n
+        for i in j:n
+            @inbounds A[i, j] = c \ B[i, j]
+        end
+    end
+    return A
+end
+
 rmul!(A::UpperOrLowerTriangular, c::Number) = @inline _triscale!(A, A, c, MulAddMul())
 lmul!(c::Number, A::UpperOrLowerTriangular) = @inline _triscale!(A, c, A, MulAddMul())
 
@@ -795,51 +864,90 @@ fillstored!(A::UpperTriangular, x)     = (fillband!(A.data, x, 0, size(A,2)-1);
 fillstored!(A::UnitUpperTriangular, x) = (fillband!(A.data, x, 1, size(A,2)-1); A)
 
 # Binary operations
-+(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data + B.data)
-+(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data + B.data)
-+(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data + triu(B.data, 1) + I)
-+(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data + tril(B.data, -1) + I)
-+(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) + B.data + I)
-+(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) + B.data + I)
-+(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) + triu(B.data, 1) + 2I)
-+(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) + tril(B.data, -1) + 2I)
+# use broadcasting if the parents are strided, where we loop only over the triangular part
+function +(A::UpperTriangular, B::UpperTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B
+    UpperTriangular(A.data + B.data)
+end
+function +(A::LowerTriangular, B::LowerTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B
+    LowerTriangular(A.data + B.data)
+end
+function +(A::UpperTriangular, B::UnitUpperTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B
+    UpperTriangular(A.data + triu(B.data, 1) + I)
+end
+function +(A::LowerTriangular, B::UnitLowerTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B
+    LowerTriangular(A.data + tril(B.data, -1) + I)
+end
+function +(A::UnitUpperTriangular, B::UpperTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B
+    UpperTriangular(triu(A.data, 1) + B.data + I)
+end
+function +(A::UnitLowerTriangular, B::LowerTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B
+    LowerTriangular(tril(A.data, -1) + B.data + I)
+end
+function +(A::UnitUpperTriangular, B::UnitUpperTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B
+    UpperTriangular(triu(A.data, 1) + triu(B.data, 1) + 2I)
+end
+function +(A::UnitLowerTriangular, B::UnitLowerTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .+ B
+    LowerTriangular(tril(A.data, -1) + tril(B.data, -1) + 2I)
+end
 +(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) + copyto!(similar(parent(B)), B)
 
--(A::UpperTriangular, B::UpperTriangular) = UpperTriangular(A.data - B.data)
--(A::LowerTriangular, B::LowerTriangular) = LowerTriangular(A.data - B.data)
--(A::UpperTriangular, B::UnitUpperTriangular) = UpperTriangular(A.data - triu(B.data, 1) - I)
--(A::LowerTriangular, B::UnitLowerTriangular) = LowerTriangular(A.data - tril(B.data, -1) - I)
--(A::UnitUpperTriangular, B::UpperTriangular) = UpperTriangular(triu(A.data, 1) - B.data + I)
--(A::UnitLowerTriangular, B::LowerTriangular) = LowerTriangular(tril(A.data, -1) - B.data + I)
--(A::UnitUpperTriangular, B::UnitUpperTriangular) = UpperTriangular(triu(A.data, 1) - triu(B.data, 1))
--(A::UnitLowerTriangular, B::UnitLowerTriangular) = LowerTriangular(tril(A.data, -1) - tril(B.data, -1))
--(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) - copyto!(similar(parent(B)), B)
-
-# use broadcasting if the parents are strided, where we loop only over the triangular part
-for op in (:+, :-)
-    for TM1 in (:LowerTriangular, :UnitLowerTriangular), TM2 in (:LowerTriangular, :UnitLowerTriangular)
-        @eval $op(A::$TM1{<:Any, <:StridedMaybeAdjOrTransMat}, B::$TM2{<:Any, <:StridedMaybeAdjOrTransMat}) = broadcast($op, A, B)
-    end
-    for TM1 in (:UpperTriangular, :UnitUpperTriangular), TM2 in (:UpperTriangular, :UnitUpperTriangular)
-        @eval $op(A::$TM1{<:Any, <:StridedMaybeAdjOrTransMat}, B::$TM2{<:Any, <:StridedMaybeAdjOrTransMat}) = broadcast($op, A, B)
-    end
+function -(A::UpperTriangular, B::UpperTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B
+    UpperTriangular(A.data - B.data)
+end
+function -(A::LowerTriangular, B::LowerTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B
+    LowerTriangular(A.data - B.data)
 end
+function -(A::UpperTriangular, B::UnitUpperTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B
+    UpperTriangular(A.data - triu(B.data, 1) - I)
+end
+function -(A::LowerTriangular, B::UnitLowerTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B
+    LowerTriangular(A.data - tril(B.data, -1) - I)
+end
+function -(A::UnitUpperTriangular, B::UpperTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B
+    UpperTriangular(triu(A.data, 1) - B.data + I)
+end
+function -(A::UnitLowerTriangular, B::LowerTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B
+    LowerTriangular(tril(A.data, -1) - B.data + I)
+end
+function -(A::UnitUpperTriangular, B::UnitUpperTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B
+    UpperTriangular(triu(A.data, 1) - triu(B.data, 1))
+end
+function -(A::UnitLowerTriangular, B::UnitLowerTriangular)
+    (parent(A) isa StridedMatrix || parent(B) isa StridedMatrix) && return A .- B
+    LowerTriangular(tril(A.data, -1) - tril(B.data, -1))
+end
+-(A::AbstractTriangular, B::AbstractTriangular) = copyto!(similar(parent(A)), A) - copyto!(similar(parent(B)), B)
 
-function kron(A::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, B::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat})
-    C = UpperTriangular(Matrix{promote_op(*, eltype(A), eltype(B))}(undef, _kronsize(A, B)))
+function kron(A::UpperTriangular{T,<:StridedMaybeAdjOrTransMat}, B::UpperTriangular{S,<:StridedMaybeAdjOrTransMat}) where {T,S}
+    C = UpperTriangular(Matrix{promote_op(*, T, S)}(undef, _kronsize(A, B)))
     return kron!(C, A, B)
 end
-function kron(A::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, B::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat})
-    C = LowerTriangular(Matrix{promote_op(*, eltype(A), eltype(B))}(undef, _kronsize(A, B)))
+function kron(A::LowerTriangular{T,<:StridedMaybeAdjOrTransMat}, B::LowerTriangular{S,<:StridedMaybeAdjOrTransMat}) where {T,S}
+    C = LowerTriangular(Matrix{promote_op(*, T, S)}(undef, _kronsize(A, B)))
     return kron!(C, A, B)
 end
 
-function kron!(C::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, A::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, B::UpperTriangular{<:Number,<:StridedMaybeAdjOrTransMat})
+function kron!(C::UpperTriangular{<:Any,<:StridedMaybeAdjOrTransMat}, A::UpperTriangular{<:Any,<:StridedMaybeAdjOrTransMat}, B::UpperTriangular{<:Any,<:StridedMaybeAdjOrTransMat})
     size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
     _triukron!(C.data, A.data, B.data)
     return C
 end
-function kron!(C::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, A::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat}, B::LowerTriangular{<:Number,<:StridedMaybeAdjOrTransMat})
+function kron!(C::LowerTriangular{<:Any,<:StridedMaybeAdjOrTransMat}, A::LowerTriangular{<:Any,<:StridedMaybeAdjOrTransMat}, B::LowerTriangular{<:Any,<:StridedMaybeAdjOrTransMat})
     size(C) == _kronsize(A, B) || throw(DimensionMismatch("kron!"))
     _trilkron!(C.data, A.data, B.data)
     return C
@@ -858,7 +966,7 @@ function _triukron!(C, A, B)
                     C[inB+k, jnB+l] = Aij * B[k, l]
                 end
                 for k = 1:(l-1)
-                    C[inB+l, jnB+k] = zero(eltype(C))
+                    C[inB+l, jnB+k] = zero(C[inB+k, jnB+l])
                 end
             end
         end
@@ -890,7 +998,7 @@ function _trilkron!(C, A, B)
                     C[inB+k, jnB+l] = Aij * B[k, l]
                 end
                 for k = (l+1):n_B
-                    C[inB+l, jnB+k] = zero(eltype(C))
+                    C[inB+l, jnB+k] = zero(C[inB+k, jnB+l])
                 end
             end
         end
@@ -916,8 +1024,6 @@ isunit_char(::UnitUpperTriangular) = 'U'
 isunit_char(::LowerTriangular) = 'N'
 isunit_char(::UnitLowerTriangular) = 'U'
 
-lmul!(A::Tridiagonal, B::AbstractTriangular) = A*full!(B)
-
 # generic fallback for AbstractTriangular matrices outside of the four subtypes provided here
 _trimul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVector) =
     lmul!(A, copyto!(C, B))
@@ -942,9 +1048,20 @@ _trimul!(C::AbstractMatrix, A::UpperOrLowerTriangular, B::AbstractTriangular) =
 _trimul!(C::AbstractMatrix, A::AbstractTriangular, B::UpperOrLowerTriangular) =
     generic_mattrimul!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
 
-lmul!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _trimul!(B, A, B)
-rmul!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _trimul!(A, A, B)
-
+function lmul!(A::AbstractTriangular, B::AbstractVecOrMat)
+    if istriu(A)
+        _trimul!(B, UpperTriangular(A), B)
+    else
+        _trimul!(B, LowerTriangular(A), B)
+    end
+end
+function rmul!(A::AbstractMatrix, B::AbstractTriangular)
+    if istriu(B)
+        _trimul!(A, A, UpperTriangular(B))
+    else
+        _trimul!(A, A, LowerTriangular(B))
+    end
+end
 
 for TC in (:AbstractVector, :AbstractMatrix)
     @eval @inline function _mul!(C::$TC, A::AbstractTriangular, B::AbstractVector, alpha::Number, beta::Number)
@@ -980,8 +1097,20 @@ _ldiv!(C::AbstractVecOrMat, A::UpperOrLowerTriangular, B::AbstractVecOrMat) =
 _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::UpperOrLowerTriangular) =
     generic_mattridiv!(C, uplo_char(B), isunit_char(B), wrapperop(parent(B)), A, _unwrap_at(parent(B)))
 
-ldiv!(A::AbstractTriangular, B::AbstractVecOrMat) = @inline _ldiv!(B, A, B)
-rdiv!(A::AbstractMatrix, B::AbstractTriangular)   = @inline _rdiv!(A, A, B)
+function ldiv!(A::AbstractTriangular, B::AbstractVecOrMat)
+    if istriu(A)
+        _ldiv!(B, UpperTriangular(A), B)
+    else
+        _ldiv!(B, LowerTriangular(A), B)
+    end
+end
+function rdiv!(A::AbstractMatrix, B::AbstractTriangular)
+    if istriu(B)
+        _rdiv!(A, A, UpperTriangular(B))
+    else
+        _rdiv!(A, A, LowerTriangular(B))
+    end
+end
 
 # preserve triangular structure in in-place multiplication/division
 for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
@@ -1095,7 +1224,11 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
     tstrided = t{<:Any, <:StridedMaybeAdjOrTransMat}
     @eval begin
         (*)(A::$t, x::Number) = $t(A.data*x)
-        (*)(A::$tstrided, x::Number) = A .* x
+        function (*)(A::$tstrided, x::Number)
+            eltype_dest = promote_op(*, eltype(A), typeof(x))
+            dest = $t(similar(parent(A), eltype_dest))
+            _triscale!(dest, x, A, MulAddMul())
+        end
 
         function (*)(A::$unitt, x::Number)
             B = $t(A.data)*x
@@ -1106,7 +1239,11 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
         end
 
         (*)(x::Number, A::$t) = $t(x*A.data)
-        (*)(x::Number, A::$tstrided) = x .* A
+        function (*)(x::Number, A::$tstrided)
+            eltype_dest = promote_op(*, typeof(x), eltype(A))
+            dest = $t(similar(parent(A), eltype_dest))
+            _triscale!(dest, x, A, MulAddMul())
+        end
 
         function (*)(x::Number, A::$unitt)
             B = x*$t(A.data)
@@ -1117,7 +1254,11 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
         end
 
         (/)(A::$t, x::Number) = $t(A.data/x)
-        (/)(A::$tstrided, x::Number) = A ./ x
+        function (/)(A::$tstrided, x::Number)
+            eltype_dest = promote_op(/, eltype(A), typeof(x))
+            dest = $t(similar(parent(A), eltype_dest))
+            _trirdiv!(dest, A,  x)
+        end
 
         function (/)(A::$unitt, x::Number)
             B = $t(A.data)/x
@@ -1129,7 +1270,11 @@ for (t, unitt) in ((UpperTriangular, UnitUpperTriangular),
         end
 
         (\)(x::Number, A::$t) = $t(x\A.data)
-        (\)(x::Number, A::$tstrided) = x .\ A
+        function (\)(x::Number, A::$tstrided)
+            eltype_dest = promote_op(\, typeof(x), eltype(A))
+            dest = $t(similar(parent(A), eltype_dest))
+            _trildiv!(dest, x, A)
+        end
 
         function (\)(x::Number, A::$unitt)
             B = x\$t(A.data)
@@ -2690,6 +2835,14 @@ end
 
 # Generic eigensystems
 eigvals(A::AbstractTriangular) = diag(A)
+# fallback for unknown types
+function eigvecs(A::AbstractTriangular{<:BlasFloat})
+    if istriu(A)
+        eigvecs(UpperTriangular(Matrix(A)))
+    else # istril(A)
+        eigvecs(LowerTriangular(Matrix(A)))
+    end
+end
 function eigvecs(A::AbstractTriangular{T}) where T
     TT = promote_type(T, Float32)
     if TT <: BlasFloat
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
index 3198e45ad3eb8..e75e1e5eefb3d 100644
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ b/stdlib/LinearAlgebra/src/tridiag.jl
@@ -173,7 +173,7 @@ adjoint(S::SymTridiagonal{<:Number, <:Base.ReshapedArray{<:Number,1,<:Adjoint}})
 
 permutedims(S::SymTridiagonal) = S
 function permutedims(S::SymTridiagonal, perm)
-    Base.checkdims_perm(S, S, perm)
+    Base.checkdims_perm(axes(S), axes(S), perm)
     NTuple{2}(perm) == (2, 1) ? permutedims(S) : S
 end
 Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(adjoint.(x)), (S.parent.dv, S.parent.ev))...)
@@ -181,7 +181,7 @@ Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(ad
 ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(_evview(S))
 issymmetric(S::SymTridiagonal) = true
 
-tr(S::SymTridiagonal) = sum(S.dv)
+tr(S::SymTridiagonal) = sum(symmetric, S.dv)
 
 @noinline function throw_diag_outofboundserror(n, sz)
     sz1, sz2 = sz
@@ -198,7 +198,11 @@ function diag(M::SymTridiagonal{T}, n::Integer=0) where T<:Number
     elseif absn == 1
         return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
     elseif absn <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-absn), zero(T))
+        v = similar(M.dv, size(M,1)-absn)
+        for i in eachindex(v)
+            v[i] = M[BandIndex(n,i)]
+        end
+        return v
     else
         throw_diag_outofboundserror(n, size(M))
     end
@@ -224,6 +228,29 @@ end
 -(A::SymTridiagonal) = SymTridiagonal(-A.dv, -A.ev)
 *(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv*B, A.ev*B)
 *(B::Number, A::SymTridiagonal) = SymTridiagonal(B*A.dv, B*A.ev)
+function rmul!(A::SymTridiagonal, x::Number)
+    if size(A,1) > 2
+        # ensure that zeros are preserved on scaling
+        y = A[3,1] * x
+        iszero(y) || throw(ArgumentError(LazyString("cannot set index (3, 1) off ",
+            lazy"the tridiagonal band to a nonzero value ($y)")))
+    end
+    A.dv .*= x
+    _evview(A) .*= x
+    return A
+end
+function lmul!(x::Number, B::SymTridiagonal)
+    if size(B,1) > 2
+        # ensure that zeros are preserved on scaling
+        y = x * B[3,1]
+        iszero(y) || throw(ArgumentError(LazyString("cannot set index (3, 1) off ",
+            lazy"the tridiagonal band to a nonzero value ($y)")))
+    end
+    @. B.dv = x * B.dv
+    ev = _evview(B)
+    @. ev = x * ev
+    return B
+end
 /(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv/B, A.ev/B)
 \(B::Number, A::SymTridiagonal) = SymTridiagonal(B\A.dv, B\A.ev)
 ==(A::SymTridiagonal{<:Number}, B::SymTridiagonal{<:Number}) =
@@ -292,8 +319,6 @@ eigmax(A::SymTridiagonal) = eigvals(A, size(A, 1):size(A, 1))[1]
 eigmin(A::SymTridiagonal) = eigvals(A, 1:1)[1]
 
 #Compute selected eigenvectors only corresponding to particular eigenvalues
-eigvecs(A::SymTridiagonal) = eigen(A).vectors
-
 """
     eigvecs(A::SymTridiagonal[, eigvals]) -> Matrix
 
@@ -368,7 +393,7 @@ function tril!(M::SymTridiagonal{T}, k::Integer=0) where T
         return Tridiagonal(M.ev,M.dv,zero(M.ev))
     elseif k == 0
         return Tridiagonal(M.ev,M.dv,zero(M.ev))
-    elseif k >= 1
+    else # if k >= 1
         return Tridiagonal(M.ev,M.dv,copy(M.ev))
     end
 end
@@ -387,7 +412,7 @@ function triu!(M::SymTridiagonal{T}, k::Integer=0) where T
         return Tridiagonal(zero(M.ev),M.dv,M.ev)
     elseif k == 0
         return Tridiagonal(zero(M.ev),M.dv,M.ev)
-    elseif k <= -1
+    else # if k <= -1
         return Tridiagonal(M.ev,M.dv,copy(M.ev))
     end
 end
@@ -472,11 +497,12 @@ Base._reverse!(A::SymTridiagonal, dims::Colon) = (reverse!(A.dv); reverse!(A.ev)
 @inline function setindex!(A::SymTridiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
+        issymmetric(x) || throw(ArgumentError("cannot set a diagonal entry of a SymTridiagonal to an asymmetric value"))
         @inbounds A.dv[i] = x
     else
         throw(ArgumentError(lazy"cannot set off-diagonal entry ($i, $j)"))
     end
-    return x
+    return A
 end
 
 ## Tridiagonal matrices ##
@@ -639,7 +665,7 @@ adjoint(S::Tridiagonal{<:Number, <:Base.ReshapedArray{<:Number,1,<:Adjoint}}) =
 transpose(S::Tridiagonal{<:Number}) = Tridiagonal(S.du, S.d, S.dl)
 permutedims(T::Tridiagonal) = Tridiagonal(T.du, T.d, T.dl)
 function permutedims(T::Tridiagonal, perm)
-    Base.checkdims_perm(T, T, perm)
+    Base.checkdims_perm(axes(T), axes(T), perm)
     NTuple{2}(perm) == (2, 1) ? permutedims(T) : T
 end
 Base.copy(aS::Adjoint{<:Any,<:Tridiagonal}) = (S = aS.parent; Tridiagonal(map(x -> copy.(adjoint.(x)), (S.du, S.d, S.dl))...))
@@ -650,7 +676,7 @@ issymmetric(S::Tridiagonal) = all(issymmetric, S.d) && all(Iterators.map((x, y)
 
 \(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:AbstractVecOrMat}) = copy(A) \ B
 
-function diag(M::Tridiagonal{T}, n::Integer=0) where T
+function diag(M::Tridiagonal, n::Integer=0)
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of n
     if n == 0
@@ -660,7 +686,11 @@ function diag(M::Tridiagonal{T}, n::Integer=0) where T
     elseif n == 1
         return copyto!(similar(M.du, length(M.du)), M.du)
     elseif abs(n) <= size(M,1)
-        return fill!(similar(M.d, size(M,1)-abs(n)), zero(T))
+        v = similar(M.d, size(M,1)-abs(n))
+        for i in eachindex(v)
+            v[i] = M[BandIndex(n,i)]
+        end
+        return v
     else
         throw(ArgumentError(LazyString(lazy"requested diagonal, $n, must be at least $(-size(M, 1)) ",
             lazy"and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
@@ -731,7 +761,7 @@ end
         throw(ArgumentError(LazyString(lazy"cannot set entry ($i, $j) off ",
             lazy"the tridiagonal band to a nonzero value ($x)")))
     end
-    return x
+    return A
 end
 
 ## structured matrix methods ##
@@ -828,6 +858,30 @@ tr(M::Tridiagonal) = sum(M.d)
 -(A::Tridiagonal) = Tridiagonal(-A.dl, -A.d, -A.du)
 *(A::Tridiagonal, B::Number) = Tridiagonal(A.dl*B, A.d*B, A.du*B)
 *(B::Number, A::Tridiagonal) = Tridiagonal(B*A.dl, B*A.d, B*A.du)
+function rmul!(T::Tridiagonal, x::Number)
+    if size(T,1) > 2
+        # ensure that zeros are preserved on scaling
+        y = T[3,1] * x
+        iszero(y) || throw(ArgumentError(LazyString("cannot set index (3, 1) off ",
+            lazy"the tridiagonal band to a nonzero value ($y)")))
+    end
+    T.dl .*= x
+    T.d .*= x
+    T.du .*= x
+    return T
+end
+function lmul!(x::Number, T::Tridiagonal)
+    if size(T,1) > 2
+        # ensure that zeros are preserved on scaling
+        y = x * T[3,1]
+        iszero(y) || throw(ArgumentError(LazyString("cannot set index (3, 1) off ",
+            lazy"the tridiagonal band to a nonzero value ($y)")))
+    end
+    @. T.dl = x * T.dl
+    @. T.d = x * T.d
+    @. T.du = x * T.du
+    return T
+end
 /(A::Tridiagonal, B::Number) = Tridiagonal(A.dl/B, A.d/B, A.du/B)
 \(B::Number, A::Tridiagonal) = Tridiagonal(B\A.dl, B\A.d, B\A.du)
 
@@ -1040,8 +1094,26 @@ function _copyto_banded!(A::Tridiagonal, B::SymTridiagonal)
     return A
 end
 function _copyto_banded!(A::SymTridiagonal, B::Tridiagonal)
-    issymmetric(B) || throw(ArgumentError("cannot copy a non-symmetric Tridiagonal matrix to a SymTridiagonal"))
+    issymmetric(B) || throw(ArgumentError("cannot copy an asymmetric Tridiagonal matrix to a SymTridiagonal"))
     A.dv .= B.d
     _evview(A) .= B.du
     return A
 end
+
+# display
+function show(io::IO, T::Tridiagonal)
+    print(io, "Tridiagonal(")
+    show(io, T.dl)
+    print(io, ", ")
+    show(io, T.d)
+    print(io, ", ")
+    show(io, T.du)
+    print(io, ")")
+end
+function show(io::IO, S::SymTridiagonal)
+    print(io, "SymTridiagonal(")
+    show(io, eltype(S) <: Number ? S.dv : view(S, diagind(S, IndexStyle(S))))
+    print(io, ", ")
+    show(io, S.ev)
+    print(io, ")")
+end
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
index b75886b8d99fb..472ea53078f87 100644
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/src/uniformscaling.jl
@@ -403,6 +403,16 @@ function copyto!(A::Tridiagonal, J::UniformScaling)
     return A
 end
 
+"""
+    copy!(dest::AbstractMatrix, src::UniformScaling)
+
+Copies a [`UniformScaling`](@ref) onto a matrix.
+
+!!! compat "Julia 1.12"
+    This method is available as of Julia 1.12.
+"""
+Base.copy!(A::AbstractMatrix, J::UniformScaling) = copyto!(A, J)
+
 function cond(J::UniformScaling{T}) where T
     onereal = inv(one(real(J.λ)))
     return J.λ ≠ zero(T) ? onereal : oftype(onereal, Inf)
diff --git a/stdlib/LinearAlgebra/test/addmul.jl b/stdlib/LinearAlgebra/test/addmul.jl
index 3fff8289242f7..208fa930e8ee1 100644
--- a/stdlib/LinearAlgebra/test/addmul.jl
+++ b/stdlib/LinearAlgebra/test/addmul.jl
@@ -164,8 +164,7 @@ end
         Bc = Matrix(B)
         returned_mat = mul!(C, A, B, α, β)
         @test returned_mat === C
-        # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-        @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
+        @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
 
         y = C[:, 1]
         x = B[:, 1]
@@ -190,8 +189,7 @@ end
 
                     returned_mat = mul!(C, Af, Bf, α, β)
                     @test returned_mat === C
-                    # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-                    @test_skip collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
+                    @test collect(returned_mat) ≈ α * Ac * Bc + β * Cc  rtol=rtol
                 end
             end
         end
@@ -203,8 +201,7 @@ end
                 Bc = Matrix(B)
                 returned_mat = mul!(C, A, B, α, zero(eltype(C)))
                 @test returned_mat === C
-                # This test is skipped because it is flakey, but should be fixed and put back (see #49966)
-                @test_skip collect(returned_mat) ≈ α * Ac * Bc  rtol=rtol
+                @test collect(returned_mat) ≈ α * Ac * Bc  rtol=rtol
             end
         end
 
@@ -220,4 +217,26 @@ end
     end
 end
 
+@testset "issue #55727" begin
+    C = zeros(1,1)
+    @testset "$(nameof(typeof(A)))" for A in Any[Diagonal([NaN]),
+                Bidiagonal([NaN], Float64[], :U),
+                Bidiagonal([NaN], Float64[], :L),
+                SymTridiagonal([NaN], Float64[]),
+                Tridiagonal(Float64[], [NaN], Float64[]),
+                ]
+        @testset "$(nameof(typeof(B)))" for B in Any[
+                    Diagonal([1.0]),
+                    Bidiagonal([1.0], Float64[], :U),
+                    Bidiagonal([1.0], Float64[], :L),
+                    SymTridiagonal([1.0], Float64[]),
+                    Tridiagonal(Float64[], [1.0], Float64[]),
+                    ]
+            C .= 0
+            @test mul!(C, A, B, 0.0, false)[] === 0.0
+            @test mul!(C, B, A, 0.0, false)[] === 0.0
+        end
+    end
+end
+
 end  # module
diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl
index 1a66c7430723e..6cf2ff9ada09c 100644
--- a/stdlib/LinearAlgebra/test/adjtrans.jl
+++ b/stdlib/LinearAlgebra/test/adjtrans.jl
@@ -532,6 +532,11 @@ end
     @test String(take!(io)) == "transpose(::Matrix{Float64})"
 end
 
+@testset "show" begin
+    @test repr(adjoint([1,2,3])) == "adjoint([1, 2, 3])"
+    @test repr(transpose([1f0,2f0])) == "transpose(Float32[1.0, 2.0])"
+end
+
 @testset "strided transposes" begin
     for t in (Adjoint, Transpose)
         @test strides(t(rand(3))) == (3, 1)
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
index 2ff3e9b423702..628e59debe8b7 100644
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ b/stdlib/LinearAlgebra/test/bidiag.jl
@@ -124,6 +124,9 @@ Random.seed!(1)
         Bl = Bidiagonal(rand(elty, 10), zeros(elty, 9), 'L')
         @test_throws ArgumentError Bu[5, 4] = 1
         @test_throws ArgumentError Bl[4, 5] = 1
+
+        # setindex should return the destination
+        @test setindex!(ubd, 1, 1, 1) === ubd
     end
 
     @testset "isstored" begin
@@ -143,11 +146,9 @@ Random.seed!(1)
 
     @testset "show" begin
         BD = Bidiagonal(dv, ev, :U)
-        dstring = sprint(Base.print_matrix,BD.dv')
-        estring = sprint(Base.print_matrix,BD.ev')
-        @test sprint(show,BD) == "$(summary(BD)):\n diag:$dstring\n super:$estring"
+        @test sprint(show,BD) == "Bidiagonal($(repr(dv)), $(repr(ev)), :U)"
         BD = Bidiagonal(dv,ev,:L)
-        @test sprint(show,BD) == "$(summary(BD)):\n diag:$dstring\n sub:$estring"
+        @test sprint(show,BD) == "Bidiagonal($(repr(dv)), $(repr(ev)), :L)"
     end
 
     @testset for uplo in (:U, :L)
@@ -828,6 +829,9 @@ end
         end
     end
 
+    @test diag(BU, -1) == [zeros(size(dv[i+1], 1), size(dv[i],2)) for i in 1:length(dv)-1]
+    @test diag(BL, 1) == [zeros(size(dv[i], 1), size(dv[i+1],2)) for i in 1:length(dv)-1]
+
     M = ones(2,2)
     for n in 0:1
         dv = fill(M, n)
@@ -835,6 +839,16 @@ end
         B = Bidiagonal(dv, ev, :U)
         @test B == Matrix{eltype(B)}(B)
     end
+
+    @testset "non-standard axes" begin
+        LinearAlgebra.diagzero(T::Type, ax::Tuple{SizedArrays.SOneTo, Vararg{SizedArrays.SOneTo}}) =
+            zeros(T, ax)
+
+        s = SizedArrays.SizedArray{(2,2)}([1 2; 3 4])
+        B = Bidiagonal(fill(s,4), fill(s,3), :U)
+        @test @inferred(B[2,1]) isa typeof(s)
+        @test all(iszero, B[2,1])
+    end
 end
 
 @testset "copyto!" begin
@@ -944,9 +958,6 @@ end
             @test_throws ArgumentError rmul!(B, A)
             @test_throws ArgumentError lmul!(A, B)
         end
-        D = Diagonal(dv)
-        @test rmul!(copy(A), D) ≈ A * D
-        @test lmul!(D, copy(A)) ≈ D * A
     end
     @testset "non-commutative" begin
         S32 = SizedArrays.SizedArray{(3,2)}(rand(3,2))
@@ -968,6 +979,55 @@ end
     end
 end
 
+@testset "rmul!/lmul! with numbers" begin
+    for T in (Bidiagonal(rand(4), rand(3), :U), Bidiagonal(rand(4), rand(3), :L))
+        @test rmul!(copy(T), 0.2) ≈ rmul!(Array(T), 0.2)
+        @test lmul!(0.2, copy(T)) ≈ lmul!(0.2, Array(T))
+        @test_throws ArgumentError rmul!(T, NaN)
+        @test_throws ArgumentError lmul!(NaN, T)
+    end
+    for T in (Bidiagonal(rand(1), rand(0), :U), Bidiagonal(rand(1), rand(0), :L))
+        @test all(isnan, rmul!(copy(T), NaN))
+        @test all(isnan, lmul!(NaN, copy(T)))
+    end
+end
+
+@testset "mul with Diagonal" begin
+    for n in 0:4
+        dv, ev = rand(n), rand(max(n-1,0))
+        d = rand(n)
+        for uplo in (:U, :L)
+            A = Bidiagonal(dv, ev, uplo)
+            D = Diagonal(d)
+            M = Matrix(A)
+            S = similar(A, size(A))
+            @test A * D ≈ mul!(S, A, D) ≈ M * D
+            @test D * A ≈ mul!(S, D, A) ≈ D * M
+            @test mul!(copy(S), D, A, 2, 2) ≈ D * M * 2 + S * 2
+            @test mul!(copy(S), A, D, 2, 2) ≈ M * D * 2 + S * 2
+
+            A2 = Bidiagonal(dv, zero(ev), uplo)
+            M2 = Array(A2)
+            S2 = Bidiagonal(copy(dv), copy(ev), uplo == (:U) ? (:L) : (:U))
+            MS2 = Array(S2)
+            @test mul!(copy(S2), D, A2) ≈ D * M2
+            @test mul!(copy(S2), A2, D) ≈ M2 * D
+            @test mul!(copy(S2), A2, D, 2, 2) ≈ M2 * D * 2 + MS2 * 2
+            @test mul!(copy(S2), D, A2, 2, 2) ≈ D * M2 * 2 + MS2 * 2
+        end
+    end
+
+    t1 = SizedArrays.SizedArray{(2,3)}([1 2 3; 3 4 5])
+    t2 = SizedArrays.SizedArray{(3,2)}([1 2; 3 4; 5 6])
+    dv, ev, d = fill(t1, 4), fill(2t1, 3), fill(t2, 4)
+    for uplo in (:U, :L)
+        A = Bidiagonal(dv, ev, uplo)
+        D = Diagonal(d)
+        @test A * D ≈ Array(A) * Array(D)
+        @test D * A ≈ Array(D) * Array(A)
+    end
+end
+
 @testset "conversion to Tridiagonal for immutable bands" begin
     n = 4
     dv = FillArrays.Fill(3, n)
@@ -984,4 +1044,98 @@ end
     @test Tridiagonal{Float64}(B) === Tridiagonal(evf, dvf, zf)
 end
 
+@testset "off-band indexing error" begin
+    B = Bidiagonal(Vector{BigInt}(undef, 4), Vector{BigInt}(undef,3), :L)
+    @test_throws "cannot set entry" B[1,2] = 4
+end
+
+@testset "mul with empty arrays" begin
+    A = zeros(5,0)
+    B = Bidiagonal(zeros(0), zeros(0), :U)
+    BL = Bidiagonal(zeros(5), zeros(4), :U)
+    @test size(A * B) == size(A)
+    @test size(BL * A) == size(A)
+    @test size(B * B) == size(B)
+    C = similar(A)
+    @test mul!(C, A, B) == A * B
+    @test mul!(C, BL, A) == BL * A
+    @test mul!(similar(B), B, B) == B * B
+    @test mul!(similar(B, size(B)), B, B) == B * B
+
+    v = zeros(size(B,2))
+    @test size(B * v) == size(v)
+    @test mul!(similar(v), B, v) == B * v
+
+    D = Diagonal(zeros(size(B,2)))
+    @test size(B * D) == size(D * B) == size(D)
+    @test mul!(similar(D), B, D) == mul!(similar(D), D, B) == B * D
+end
+
+@testset "mul for small matrices" begin
+    @testset for n in 0:6
+        D = Diagonal(rand(n))
+        v = rand(n)
+        @testset for uplo in (:L, :U)
+            B = Bidiagonal(rand(n), rand(max(n-1,0)), uplo)
+            M = Matrix(B)
+
+            @test B * v ≈ M * v
+            @test mul!(similar(v), B, v) ≈ M * v
+            @test mul!(ones(size(v)), B, v, 2, 3) ≈ M * v * 2 .+ 3
+
+            @test B * B ≈ M * M
+            @test mul!(similar(B, size(B)), B, B) ≈ M * M
+            @test mul!(ones(size(B)), B, B, 2, 4) ≈ M * M * 2 .+ 4
+
+            for m in 0:6
+                AL = rand(m,n)
+                AR = rand(n,m)
+                @test AL * B ≈ AL * M
+                @test B * AR ≈ M * AR
+                @test mul!(similar(AL), AL, B) ≈ AL * M
+                @test mul!(similar(AR), B, AR) ≈ M * AR
+                @test mul!(ones(size(AL)), AL, B, 2, 4) ≈ AL * M * 2 .+ 4
+                @test mul!(ones(size(AR)), B, AR, 2, 4) ≈ M * AR * 2 .+ 4
+            end
+
+            @test B * D ≈ M * D
+            @test D * B ≈ D * M
+            @test mul!(similar(B), B, D) ≈ M * D
+            @test mul!(similar(B), B, D) ≈ M * D
+            @test mul!(similar(B, size(B)), D, B) ≈ D * M
+            @test mul!(similar(B, size(B)), B, D) ≈ M * D
+            @test mul!(ones(size(B)), D, B, 2, 4) ≈ D * M * 2 .+ 4
+            @test mul!(ones(size(B)), B, D, 2, 4) ≈ M * D * 2 .+ 4
+        end
+        BL = Bidiagonal(rand(n), rand(max(0, n-1)), :L)
+        ML = Matrix(BL)
+        BU = Bidiagonal(rand(n), rand(max(0, n-1)), :U)
+        MU = Matrix(BU)
+        T = Tridiagonal(zeros(max(0, n-1)), zeros(n), zeros(max(0, n-1)))
+        @test mul!(T, BL, BU) ≈ ML * MU
+        @test mul!(T, BU, BL) ≈ MU * ML
+        T = Tridiagonal(ones(max(0, n-1)), ones(n), ones(max(0, n-1)))
+        @test mul!(copy(T), BL, BU, 2, 3) ≈ ML * MU * 2 + T * 3
+        @test mul!(copy(T), BU, BL, 2, 3) ≈ MU * ML * 2 + T * 3
+    end
+
+    n = 4
+    arr = SizedArrays.SizedArray{(2,2)}(reshape([1:4;],2,2))
+    for B in (
+            Bidiagonal(fill(arr,n), fill(arr,n-1), :L),
+            Bidiagonal(fill(arr,n), fill(arr,n-1), :U),
+            )
+        @test B * B ≈ Matrix(B) * Matrix(B)
+        BL = Bidiagonal(fill(arr,n), fill(arr,n-1), :L)
+        BU = Bidiagonal(fill(arr,n), fill(arr,n-1), :U)
+        @test BL * B ≈ Matrix(BL) * Matrix(B)
+        @test BU * B ≈ Matrix(BU) * Matrix(B)
+        @test B * BL ≈ Matrix(B) * Matrix(BL)
+        @test B * BU ≈ Matrix(B) * Matrix(BU)
+        D = Diagonal(fill(arr,n))
+        @test D * B ≈ Matrix(D) * Matrix(B)
+        @test B * D ≈ Matrix(B) * Matrix(D)
+    end
+end
+
 end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
index 2bcc6208c12df..00bfc18a21638 100644
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ b/stdlib/LinearAlgebra/test/cholesky.jl
@@ -630,4 +630,14 @@ end
     end
 end
 
+@testset "cholesky_of_cholesky" begin
+    for T in (Float64, ComplexF64), uplo in (:U, :L)
+        A = randn(T, 100, 100)
+        P = Hermitian(A' * A, uplo)
+        C = cholesky(P)
+        CC = cholesky(C)
+        @test C == CC
+    end
+end
+
 end # module TestCholesky
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
index afc1df817a544..1d43d76899392 100644
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ b/stdlib/LinearAlgebra/test/dense.jl
@@ -1285,4 +1285,20 @@ end
     @test eltype(A) == eltype(T)
 end
 
+@testset "tr" begin
+    @testset "block matrices" begin
+        S = [1 2; 3 4]
+        M = fill(S, 3, 3)
+        @test tr(M) == 3S
+        @test tr(view(M, :, :)) == 3S
+        @test tr(view(M, axes(M)...)) == 3S
+    end
+    @testset "avoid promotion" begin
+        A = Int8[1 3; 2 4]
+        @test tr(A) === Int8(5)
+        @test tr(view(A, :, :)) === Int8(5)
+        @test tr(view(A, axes(A)...)) === Int8(5)
+    end
+end
+
 end # module TestDense
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index 1a3b8d4fd0ea7..85fe963e3592b 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -617,6 +617,8 @@ end
             @test_throws ArgumentError D[i, j] = 1
         end
     end
+    # setindex should return the destination
+    @test setindex!(D, 1, 1, 1) === D
 end
 
 @testset "Test reverse" begin
@@ -779,6 +781,9 @@ end
     @test transpose(Dherm) == Diagonal([[1 1-im; 1+im 1], [1 1-im; 1+im 1]])
     @test adjoint(Dsym) == Diagonal([[1 1-im; 1-im 1], [1 1-im; 1-im 1]])
     @test transpose(Dsym) == Dsym
+    @test diag(D, 0) == diag(D) == [[1 2; 3 4], [1 2; 3 4]]
+    @test diag(D, 1) == diag(D, -1) == [zeros(Int,2,2)]
+    @test diag(D, 2) == diag(D, -2) == []
 
     v = [[1, 2], [3, 4]]
     @test Dherm' * v == Dherm * v
@@ -810,6 +815,13 @@ end
     D = Diagonal(fill(S,3))
     @test D * fill(S,2,3)' == fill(S * S', 3, 2)
     @test fill(S,3,2)' * D == fill(S' * S, 2, 3)
+
+    @testset "indexing with non-standard-axes" begin
+        s = SizedArrays.SizedArray{(2,2)}([1 2; 3 4])
+        D = Diagonal(fill(s,3))
+        @test @inferred(D[1,2]) isa typeof(s)
+        @test all(iszero, D[1,2])
+    end
 end
 
 @testset "Eigensystem for block diagonal (issue #30681)" begin
@@ -1231,6 +1243,11 @@ Base.size(::SMatrix1) = (1, 1)
     @test C isa Matrix{SMatrix1{String}}
 end
 
+@testset "show" begin
+    @test repr(Diagonal([1,2])) == "Diagonal([1, 2])"  # 2-arg show
+    @test contains(repr(MIME"text/plain"(), Diagonal([1,2])), "⋅  2")  # 3-arg show
+end
+
 @testset "copyto! with UniformScaling" begin
     @testset "Fill" begin
         for len in (4, InfiniteArrays.Infinity())
@@ -1255,6 +1272,17 @@ end
     @test *(Diagonal(ones(n)), Diagonal(1:n), Diagonal(ones(n)), Diagonal(1:n)) isa Diagonal
 end
 
+@testset "triple multiplication with a sandwiched BandedMatrix" begin
+    D = Diagonal(StepRangeLen(NaN, 0, 4));
+    B = Bidiagonal(1:4, 1:3, :U)
+    C = D * B * D
+    @test iszero(diag(C, 2))
+    # test associativity
+    C1 = (D * B) * D
+    C2 = D * (B * D)
+    @test diag(C,2) == diag(C1,2) == diag(C2,2)
+end
+
 @testset "diagind" begin
     D = Diagonal(1:4)
     M = Matrix(D)
@@ -1335,4 +1363,32 @@ end
     end
 end
 
+@testset "rmul!/lmul! with numbers" begin
+    D = Diagonal(rand(4))
+    @test rmul!(copy(D), 0.2) ≈ rmul!(Array(D), 0.2)
+    @test lmul!(0.2, copy(D)) ≈ lmul!(0.2, Array(D))
+    @test_throws ArgumentError rmul!(D, NaN)
+    @test_throws ArgumentError lmul!(NaN, D)
+    D = Diagonal(rand(1))
+    @test all(isnan, rmul!(copy(D), NaN))
+    @test all(isnan, lmul!(NaN, copy(D)))
+end
+
+@testset "+/- with block Symmetric/Hermitian" begin
+    for p in ([1 2; 3 4], [1 2+im; 2-im 4+2im])
+        m = SizedArrays.SizedArray{(2,2)}(p)
+        D = Diagonal(fill(m, 2))
+        for T in (Symmetric, Hermitian)
+            S = T(fill(m, 2, 2))
+            @test D + S == Array(D) + Array(S)
+            @test S + D == Array(S) + Array(D)
+        end
+    end
+end
+
+@testset "bounds-check with CartesianIndex ranges" begin
+    D = Diagonal(1:typemax(Int))
+    @test checkbounds(Bool, D, diagind(D, IndexCartesian()))
+end
+
 end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
index 767f40aa1e53f..54dbb70aa2065 100644
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ b/stdlib/LinearAlgebra/test/hessenberg.jl
@@ -272,4 +272,11 @@ end
     @test S[1,2] == S[Int8(1),UInt16(2)] == S[big(1), Int16(2)]
 end
 
+@testset "complex Symmetric" begin
+    D = diagm(0=>ComplexF64[1,2])
+    S = Symmetric(D)
+    H = hessenberg(S)
+    @test H.H == D
+end
+
 end # module TestHessenberg
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
index fd14dad4634a8..f05d7d99c2437 100644
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ b/stdlib/LinearAlgebra/test/lapack.jl
@@ -889,4 +889,14 @@ end
     @test UpperTriangular(A) == UpperTriangular(B)
 end
 
+@testset "inference in syev!/syevd!" begin
+    for T in (Float32, Float64), CT in (T, Complex{T})
+        A = rand(CT, 4,4)
+        @inferred (A -> LAPACK.syev!('N', 'U', A))(A)
+        @inferred (A -> LAPACK.syev!('V', 'U', A))(A)
+        @inferred (A -> LAPACK.syevd!('N', 'U', A))(A)
+        @inferred (A -> LAPACK.syevd!('V', 'U', A))(A)
+    end
+end
+
 end # module TestLAPACK
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
index 56834a39a3ceb..4c79451ebfc8b 100644
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ b/stdlib/LinearAlgebra/test/matmul.jl
@@ -1120,4 +1120,14 @@ end
     end
 end
 
+@testset "vector-matrix multiplication" begin
+    a = [1,2]
+    A = reshape([1,2], 2, 1)
+    B = [1 2]
+    @test a * B ≈ A * B
+    B = reshape([1,2], 2, 1)
+    @test a * B' ≈ A * B'
+    @test a * transpose(B) ≈ A * transpose(B)
+end
+
 end # module TestMatmul
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
index 2f870373c9586..4b91bcfc1a4d5 100644
--- a/stdlib/LinearAlgebra/test/special.jl
+++ b/stdlib/LinearAlgebra/test/special.jl
@@ -5,6 +5,10 @@ module TestSpecial
 using Test, LinearAlgebra, Random
 using LinearAlgebra: rmul!, BandIndex
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
+using .Main.SizedArrays
+
 n= 10 #Size of matrix to test
 Random.seed!(1)
 
@@ -555,8 +559,8 @@ end
     @testset "from Diagonal" begin
         D = Diagonal(d)
         @testset "to Bidiagonal" begin
-            BU = Bidiagonal(zero(d), oneunit.(du), :U)
-            BL = Bidiagonal(zero(d), oneunit.(dl), :L)
+            BU = Bidiagonal(similar(d, BigInt), similar(du, BigInt), :U)
+            BL = Bidiagonal(similar(d, BigInt), similar(dl, BigInt), :L)
             for B in (BL, BU)
                 copyto!(B, D)
                 @test B == D
@@ -573,7 +577,7 @@ end
             end
         end
         @testset "to Tridiagonal" begin
-            T = Tridiagonal(oneunit.(dl), zero(d), oneunit.(du))
+            T = Tridiagonal(similar(dl, BigInt), similar(d, BigInt), similar(du, BigInt))
             copyto!(T, D)
             @test T == D
 
@@ -586,8 +590,8 @@ end
             end
         end
         @testset "to SymTridiagonal" begin
-            for du2 in (oneunit.(du), oneunit.(d))
-                S = SymTridiagonal(zero(d), du2)
+            for du2 in (similar(du, BigInt), similar(d, BigInt))
+                S = SymTridiagonal(similar(d), du2)
                 copyto!(S, D)
                 @test S == D
             end
@@ -630,13 +634,14 @@ end
             end
         end
         @testset "to Tridiagonal" begin
-            T = Tridiagonal(oneunit.(dl), zero(d), oneunit.(du))
+            T = Tridiagonal(similar(dl, BigInt), similar(d, BigInt), similar(du, BigInt))
             for B in (BL, BU, BLones, BUones)
                 copyto!(T, B)
                 @test T == B
             end
 
             @testset "mismatched size" begin
+                T = Tridiagonal(oneunit.(dl), zero(d), oneunit.(du))
                 for uplo in (:L, :U)
                     T .= 0
                     copyto!(T, Bidiagonal([1], Int[], uplo))
@@ -647,8 +652,8 @@ end
             end
         end
         @testset "to SymTridiagonal" begin
-            for du2 in (oneunit.(du), oneunit.(d))
-                S = SymTridiagonal(zero(d), du2)
+            for du2 in (similar(du, BigInt), similar(d, BigInt))
+                S = SymTridiagonal(similar(d, BigInt), du2)
                 for B in (BL, BU)
                     copyto!(S, B)
                     @test S == B
@@ -785,4 +790,73 @@ end
     end
 end
 
+@testset "Partly filled Hermitian and Diagonal algebra" begin
+    D = Diagonal([1,2])
+    for S in (Symmetric, Hermitian), uplo in (:U, :L)
+        M = Matrix{BigInt}(undef, 2, 2)
+        M[1,1] = M[2,2] = M[1+(uplo == :L), 1 + (uplo == :U)] = 3
+        H = S(M, uplo)
+        HM = Matrix(H)
+        @test H + D == D + H == HM + D
+        @test H - D == HM - D
+        @test D - H == D - HM
+    end
+end
+
+@testset "block SymTridiagonal" begin
+    m = SizedArrays.SizedArray{(2,2)}(reshape([1:4;;],2,2))
+    S = SymTridiagonal(fill(m,4), fill(m,3))
+    SA = Array(S)
+    D = Diagonal(fill(m,4))
+    DA = Array(D)
+    BU = Bidiagonal(fill(m,4), fill(m,3), :U)
+    BUA = Array(BU)
+    BL = Bidiagonal(fill(m,4), fill(m,3), :L)
+    BLA = Array(BL)
+    T = Tridiagonal(fill(m,3), fill(m,4), fill(m,3))
+    TA = Array(T)
+    IA = Array(Diagonal(fill(one(m), 4)))
+    @test S + D == D + S == SA + DA
+    @test S - D == -(D - S) == SA - DA
+    @test S + BU == SA + BUA
+    @test S - BU == -(BU - S) == SA - BUA
+    @test S + BL == SA + BLA
+    @test S - BL == -(BL - S) == SA - BLA
+    @test S + T == SA + TA
+    @test S - T == -(T - S) == SA - TA
+    @test S + S == SA + SA
+    @test S - S == -(S - S) == SA - SA
+    @test S + I == I + S == SA + IA
+    @test S - I == -(I - S) == SA - IA
+
+    @test S == S
+    @test S != D
+    @test S != BL
+    @test S != BU
+    @test S != T
+
+    @test_throws ArgumentError fill!(S, m)
+    S_small = SymTridiagonal(fill(m,2), fill(m,1))
+    @test_throws "cannot fill a SymTridiagonal with an asymmetric value" fill!(S, m)
+    fill!(S_small, Symmetric(m))
+    @test all(==(Symmetric(m)), S_small)
+
+    @testset "diag" begin
+        m = SizedArrays.SizedArray{(2,2)}([1 3; 3 4])
+        D = Diagonal(fill(m,4))
+        z = fill(zero(m),3)
+        d = fill(m,4)
+        BU = Bidiagonal(d, z, :U)
+        BL = Bidiagonal(d, z, :L)
+        T = Tridiagonal(z, d, z)
+        for ev in (fill(zero(m),3), fill(zero(m),4))
+            SD = SymTridiagonal(fill(m,4), ev)
+            @test SD == D == SD
+            @test SD == BU == SD
+            @test SD == BL == SD
+            @test SD == T == SD
+        end
+    end
+end
+
 end # module TestSpecial
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
index 414b4ad5dd722..7a51ab9d454af 100644
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ b/stdlib/LinearAlgebra/test/symmetric.jl
@@ -223,8 +223,8 @@ end
 
         @testset "linalg unary ops" begin
             @testset "tr" begin
-                @test tr(asym) == tr(Symmetric(asym))
-                @test tr(aherm) == tr(Hermitian(aherm))
+                @test tr(asym) ≈ tr(Symmetric(asym))
+                @test tr(aherm) ≈ tr(Hermitian(aherm))
             end
 
             @testset "isposdef[!]" begin
@@ -1116,4 +1116,48 @@ end
     end
 end
 
+@testset "tr for block matrices" begin
+    m = [1 2; 3 4]
+    for b in (m, m * (1 + im))
+        M = fill(b, 3, 3)
+        for ST in (Symmetric, Hermitian)
+            S = ST(M)
+            @test tr(S) == sum(diag(S))
+        end
+    end
+end
+
+@testset "setindex! returns the destination" begin
+    M = rand(2,2)
+    for T in (Symmetric, Hermitian)
+        S = T(M)
+        @test setindex!(S, 0, 2, 2) === S
+    end
+end
+
+@testset "partly iniitalized matrices" begin
+    a = Matrix{BigFloat}(undef, 2,2)
+    a[1] = 1; a[3] = 1; a[4] = 1
+    h = Hermitian(a)
+    s = Symmetric(a)
+    d = Diagonal([1,1])
+    symT = SymTridiagonal([1 1;1 1])
+    @test h+d == Array(h) + Array(d)
+    @test h+symT == Array(h) + Array(symT)
+    @test s+d == Array(s) + Array(d)
+    @test s+symT == Array(s) + Array(symT)
+    @test h-d == Array(h) - Array(d)
+    @test h-symT == Array(h) - Array(symT)
+    @test s-d == Array(s) - Array(d)
+    @test s-symT == Array(s) - Array(symT)
+    @test d+h == Array(d) + Array(h)
+    @test symT+h == Array(symT) + Array(h)
+    @test d+s == Array(d) + Array(s)
+    @test symT+s == Array(symT) + Array(s)
+    @test d-h == Array(d) - Array(h)
+    @test symT-h == Array(symT) - Array(h)
+    @test d-s == Array(d) - Array(s)
+    @test symT-s == Array(symT) - Array(s)
+end
+
 end # module TestSymmetric
diff --git a/stdlib/LinearAlgebra/test/symmetriceigen.jl b/stdlib/LinearAlgebra/test/symmetriceigen.jl
index cacdb72c63071..71087ae4d8d24 100644
--- a/stdlib/LinearAlgebra/test/symmetriceigen.jl
+++ b/stdlib/LinearAlgebra/test/symmetriceigen.jl
@@ -171,6 +171,17 @@ end
     @test D isa Eigen{ComplexF16, Float16, Matrix{ComplexF16}, Vector{Float16}}
     @test D.values ≈ D32.values
     @test D.vectors ≈ D32.vectors
+
+    # ensure that different algorithms dispatch correctly
+    λ, V = eigen(C, LinearAlgebra.QRIteration())
+    @test λ isa Vector{Float16}
+    @test C * V ≈ V * Diagonal(λ)
+end
+
+@testset "complex Symmetric" begin
+    S = Symmetric(rand(ComplexF64,2,2))
+    λ, v = eigen(S)
+    @test S * v ≈ v * Diagonal(λ)
 end
 
 end # module TestSymmetricEigen
diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl
index 5ee8143e3f4bb..ec9a3079e2643 100644
--- a/stdlib/LinearAlgebra/test/triangular.jl
+++ b/stdlib/LinearAlgebra/test/triangular.jl
@@ -6,7 +6,7 @@ debug = false
 using Test, LinearAlgebra, Random
 using LinearAlgebra: BlasFloat, errorbounds, full!, transpose!,
     UnitUpperTriangular, UnitLowerTriangular,
-    mul!, rdiv!, rmul!, lmul!
+    mul!, rdiv!, rmul!, lmul!, BandIndex
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 
@@ -25,6 +25,12 @@ debug && println("Test basic type functionality")
 @test_throws DimensionMismatch LowerTriangular(randn(5, 4))
 @test LowerTriangular(randn(3, 3)) |> t -> [size(t, i) for i = 1:3] == [size(Matrix(t), i) for i = 1:3]
 
+struct MyTriangular{T, A<:LinearAlgebra.AbstractTriangular{T}} <: LinearAlgebra.AbstractTriangular{T}
+    data :: A
+end
+Base.size(A::MyTriangular) = size(A.data)
+Base.getindex(A::MyTriangular, i::Int, j::Int) = A.data[i,j]
+
 # The following test block tries to call all methods in base/linalg/triangular.jl in order for a combination of input element types. Keep the ordering when adding code.
 @testset for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
     # Begin loop for first Triangular matrix
@@ -436,8 +442,6 @@ debug && println("Test basic type functionality")
 
             debug && println("elty1: $elty1, A1: $t1, B: $eltyB")
 
-            Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
-            @test lmul!(Tri,copy(A1)) ≈ Tri*M1
             Tri = Tridiagonal(rand(eltyB,n-1),rand(eltyB,n),rand(eltyB,n-1))
             C = Matrix{promote_type(elty1,eltyB)}(undef, n, n)
             mul!(C, Tri, A1)
@@ -897,7 +901,7 @@ end
     function test_one_oneunit_triangular(a)
         b = Matrix(a)
         @test (@inferred a^1) == b^1
-        @test (@inferred a^-1) == b^-1
+        @test (@inferred a^-1) ≈ b^-1
         @test one(a) == one(b)
         @test one(a)*a == a
         @test a*one(a) == a
@@ -1044,6 +1048,9 @@ end
             @test 2\L == 2\B
             @test real(L) == real(B)
             @test imag(L) == imag(B)
+            if MT == LowerTriangular
+                @test isa(kron(L,L), MT)
+            end
             @test kron(L,L) == kron(B,B)
             @test transpose!(MT(copy(A))) == transpose(L) broken=!(A isa Matrix)
             @test adjoint!(MT(copy(A))) == adjoint(L) broken=!(A isa Matrix)
@@ -1066,6 +1073,9 @@ end
             @test 2\U == 2\B
             @test real(U) == real(B)
             @test imag(U) == imag(B)
+            if MT == UpperTriangular
+                @test isa(kron(U,U), MT)
+            end
             @test kron(U,U) == kron(B,B)
             @test transpose!(MT(copy(A))) == transpose(U) broken=!(A isa Matrix)
             @test adjoint!(MT(copy(A))) == adjoint(U) broken=!(A isa Matrix)
@@ -1077,10 +1087,20 @@ end
     for T in (UpperTriangular, LowerTriangular)
         t = T(fill(ones(2,2), 2, 2))
         m = Matrix(t)
+        @test isa(kron(t,t), T)
         @test kron(t, t) ≈ kron(m, m)
     end
 end
 
+@testset "kron with triangular matrices of mixed eltypes" begin
+    for T in (UpperTriangular, LowerTriangular)
+        U = T(Matrix{Union{Missing,Int}}(fill(2, 2, 2)))
+        U[1, 1] = missing
+        @test kron(U, U)[2, 3] == 0
+        @test kron(U, U)[3, 2] == 0
+    end
+end
+
 @testset "copyto! tests" begin
     @testset "copyto! with aliasing (#39460)" begin
         M = Matrix(reshape(1:36, 6, 6))
@@ -1180,4 +1200,126 @@ end
     @test V == Diagonal([1, 1])
 end
 
+@testset "preserve structure in scaling by NaN" begin
+    M = rand(Int8,2,2)
+    for (Ts, TD) in (((UpperTriangular, UnitUpperTriangular), UpperTriangular),
+                    ((LowerTriangular, UnitLowerTriangular), LowerTriangular))
+        for T in Ts
+            U = T(M)
+            for V in (U * NaN, NaN * U, U / NaN, NaN \ U)
+                @test V isa TD{Float64, Matrix{Float64}}
+                @test all(isnan, diag(V))
+            end
+        end
+    end
+end
+
+@testset "eigvecs for AbstractTriangular" begin
+    S = SizedArrays.SizedArray{(3,3)}(reshape(1:9,3,3))
+    for T in (UpperTriangular, UnitUpperTriangular,
+                LowerTriangular, UnitLowerTriangular)
+        U = T(S)
+        V = eigvecs(U)
+        λ = eigvals(U)
+        @test U * V ≈ V * Diagonal(λ)
+
+        MU = MyTriangular(U)
+        V = eigvecs(U)
+        λ = eigvals(U)
+        @test MU * V ≈ V * Diagonal(λ)
+    end
+end
+
+@testset "(l/r)mul! and (l/r)div! for generic triangular" begin
+    @testset for T in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
+        M = MyTriangular(T(rand(4,4)))
+        A = rand(4,4)
+        Ac = similar(A)
+        @testset "lmul!" begin
+            Ac .= A
+            lmul!(M, Ac)
+            @test Ac ≈ M * A
+        end
+        @testset "rmul!" begin
+            Ac .= A
+            rmul!(Ac, M)
+            @test Ac ≈ A * M
+        end
+        @testset "ldiv!" begin
+            Ac .= A
+            ldiv!(M, Ac)
+            @test Ac ≈ M \ A
+        end
+        @testset "rdiv!" begin
+            Ac .= A
+            rdiv!(Ac, M)
+            @test Ac ≈ A / M
+        end
+    end
+end
+
+@testset "istriu/istril forwards to parent" begin
+    @testset "$(nameof(typeof(M)))" for M in [Tridiagonal(rand(n-1), rand(n), rand(n-1)),
+                Tridiagonal(zeros(n-1), zeros(n), zeros(n-1)),
+                Diagonal(randn(n)),
+                Diagonal(zeros(n)),
+                ]
+        @testset for TriT in (UpperTriangular, UnitUpperTriangular, LowerTriangular, UnitLowerTriangular)
+            U = TriT(M)
+            A = Array(U)
+            for k in -n:n
+                @test istriu(U, k) == istriu(A, k)
+                @test istril(U, k) == istril(A, k)
+            end
+        end
+    end
+    z = zeros(n,n)
+    @testset for TriT in (UpperTriangular, UnitUpperTriangular, LowerTriangular, UnitLowerTriangular)
+        P = Matrix{BigFloat}(undef, n, n)
+        copytrito!(P, z, TriT <: Union{UpperTriangular, UnitUpperTriangular} ? 'U' : 'L')
+        U = TriT(P)
+        A = Array(U)
+        @testset for k in -n:n
+            @test istriu(U, k) == istriu(A, k)
+            @test istril(U, k) == istril(A, k)
+        end
+    end
+end
+
+@testset "indexing with a BandIndex" begin
+    # these tests should succeed even if the linear index along
+    # the band isn't a constant, or type-inferred at all
+    M = rand(Int,2,2)
+    f(A,j, v::Val{n}) where {n} = Val(A[BandIndex(n,j)])
+    function common_tests(M, ind)
+        j = ind[]
+        @test @inferred(f(UpperTriangular(M), j, Val(-1))) == Val(0)
+        @test @inferred(f(UnitUpperTriangular(M), j, Val(-1))) == Val(0)
+        @test @inferred(f(UnitUpperTriangular(M), j, Val(0))) == Val(1)
+        @test @inferred(f(LowerTriangular(M), j, Val(1))) == Val(0)
+        @test @inferred(f(UnitLowerTriangular(M), j, Val(1))) == Val(0)
+        @test @inferred(f(UnitLowerTriangular(M), j, Val(0))) == Val(1)
+    end
+    common_tests(M, Any[1])
+
+    M = Diagonal([1,2])
+    common_tests(M, Any[1])
+    # extra tests for banded structure of the parent
+    for T in (UpperTriangular, UnitUpperTriangular)
+        @test @inferred(f(T(M), 1, Val(1))) == Val(0)
+    end
+    for T in (LowerTriangular, UnitLowerTriangular)
+        @test @inferred(f(T(M), 1, Val(-1))) == Val(0)
+    end
+
+    M = Tridiagonal([1,2], [1,2,3], [1,2])
+    common_tests(M, Any[1])
+    for T in (UpperTriangular, UnitUpperTriangular)
+        @test @inferred(f(T(M), 1, Val(2))) == Val(0)
+    end
+    for T in (LowerTriangular, UnitLowerTriangular)
+        @test @inferred(f(T(M), 1, Val(-2))) == Val(0)
+    end
+end
+
 end # module TestTriangular
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
index fae708c4c8db4..b6e93341b1946 100644
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ b/stdlib/LinearAlgebra/test/tridiag.jl
@@ -18,6 +18,9 @@ using .Main.FillArrays
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
+isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
+using .Main.SizedArrays
+
 include("testutils.jl") # test_approx_eq_modphase
 
 #Test equivalence of eigenvectors/singular vectors taking into account possible phase (sign) differences
@@ -132,27 +135,43 @@ end
         @test_throws ArgumentError tril!(SymTridiagonal(d, dl), n)
         @test_throws ArgumentError tril!(Tridiagonal(dl, d, du), -n - 2)
         @test_throws ArgumentError tril!(Tridiagonal(dl, d, du), n)
-        @test tril(SymTridiagonal(d,dl))    == Tridiagonal(dl,d,zerosdl)
-        @test tril(SymTridiagonal(d,dl),1)  == Tridiagonal(dl,d,dl)
-        @test tril(SymTridiagonal(d,dl),-1) == Tridiagonal(dl,zerosd,zerosdl)
-        @test tril(SymTridiagonal(d,dl),-2) == Tridiagonal(zerosdl,zerosd,zerosdl)
-        @test tril(Tridiagonal(dl,d,du))    == Tridiagonal(dl,d,zerosdu)
-        @test tril(Tridiagonal(dl,d,du),1)  == Tridiagonal(dl,d,du)
-        @test tril(Tridiagonal(dl,d,du),-1) == Tridiagonal(dl,zerosd,zerosdu)
-        @test tril(Tridiagonal(dl,d,du),-2) == Tridiagonal(zerosdl,zerosd,zerosdu)
+        @test @inferred(tril(SymTridiagonal(d,dl)))    == Tridiagonal(dl,d,zerosdl)
+        @test @inferred(tril(SymTridiagonal(d,dl),1))  == Tridiagonal(dl,d,dl)
+        @test @inferred(tril(SymTridiagonal(d,dl),-1)) == Tridiagonal(dl,zerosd,zerosdl)
+        @test @inferred(tril(SymTridiagonal(d,dl),-2)) == Tridiagonal(zerosdl,zerosd,zerosdl)
+        @test @inferred(tril(Tridiagonal(dl,d,du)))    == Tridiagonal(dl,d,zerosdu)
+        @test @inferred(tril(Tridiagonal(dl,d,du),1))  == Tridiagonal(dl,d,du)
+        @test @inferred(tril(Tridiagonal(dl,d,du),-1)) == Tridiagonal(dl,zerosd,zerosdu)
+        @test @inferred(tril(Tridiagonal(dl,d,du),-2)) == Tridiagonal(zerosdl,zerosd,zerosdu)
+        @test @inferred(tril!(copy(SymTridiagonal(d,dl))))    == Tridiagonal(dl,d,zerosdl)
+        @test @inferred(tril!(copy(SymTridiagonal(d,dl)),1))  == Tridiagonal(dl,d,dl)
+        @test @inferred(tril!(copy(SymTridiagonal(d,dl)),-1)) == Tridiagonal(dl,zerosd,zerosdl)
+        @test @inferred(tril!(copy(SymTridiagonal(d,dl)),-2)) == Tridiagonal(zerosdl,zerosd,zerosdl)
+        @test @inferred(tril!(copy(Tridiagonal(dl,d,du))))    == Tridiagonal(dl,d,zerosdu)
+        @test @inferred(tril!(copy(Tridiagonal(dl,d,du)),1))  == Tridiagonal(dl,d,du)
+        @test @inferred(tril!(copy(Tridiagonal(dl,d,du)),-1)) == Tridiagonal(dl,zerosd,zerosdu)
+        @test @inferred(tril!(copy(Tridiagonal(dl,d,du)),-2)) == Tridiagonal(zerosdl,zerosd,zerosdu)
 
         @test_throws ArgumentError triu!(SymTridiagonal(d, dl), -n)
         @test_throws ArgumentError triu!(SymTridiagonal(d, dl), n + 2)
         @test_throws ArgumentError triu!(Tridiagonal(dl, d, du), -n)
         @test_throws ArgumentError triu!(Tridiagonal(dl, d, du), n + 2)
-        @test triu(SymTridiagonal(d,dl))    == Tridiagonal(zerosdl,d,dl)
-        @test triu(SymTridiagonal(d,dl),-1) == Tridiagonal(dl,d,dl)
-        @test triu(SymTridiagonal(d,dl),1)  == Tridiagonal(zerosdl,zerosd,dl)
-        @test triu(SymTridiagonal(d,dl),2)  == Tridiagonal(zerosdl,zerosd,zerosdl)
-        @test triu(Tridiagonal(dl,d,du))    == Tridiagonal(zerosdl,d,du)
-        @test triu(Tridiagonal(dl,d,du),-1) == Tridiagonal(dl,d,du)
-        @test triu(Tridiagonal(dl,d,du),1)  == Tridiagonal(zerosdl,zerosd,du)
-        @test triu(Tridiagonal(dl,d,du),2)  == Tridiagonal(zerosdl,zerosd,zerosdu)
+        @test @inferred(triu(SymTridiagonal(d,dl)))    == Tridiagonal(zerosdl,d,dl)
+        @test @inferred(triu(SymTridiagonal(d,dl),-1)) == Tridiagonal(dl,d,dl)
+        @test @inferred(triu(SymTridiagonal(d,dl),1))  == Tridiagonal(zerosdl,zerosd,dl)
+        @test @inferred(triu(SymTridiagonal(d,dl),2))  == Tridiagonal(zerosdl,zerosd,zerosdl)
+        @test @inferred(triu(Tridiagonal(dl,d,du)))    == Tridiagonal(zerosdl,d,du)
+        @test @inferred(triu(Tridiagonal(dl,d,du),-1)) == Tridiagonal(dl,d,du)
+        @test @inferred(triu(Tridiagonal(dl,d,du),1))  == Tridiagonal(zerosdl,zerosd,du)
+        @test @inferred(triu(Tridiagonal(dl,d,du),2))  == Tridiagonal(zerosdl,zerosd,zerosdu)
+        @test @inferred(triu!(copy(SymTridiagonal(d,dl))))    == Tridiagonal(zerosdl,d,dl)
+        @test @inferred(triu!(copy(SymTridiagonal(d,dl)),-1)) == Tridiagonal(dl,d,dl)
+        @test @inferred(triu!(copy(SymTridiagonal(d,dl)),1))  == Tridiagonal(zerosdl,zerosd,dl)
+        @test @inferred(triu!(copy(SymTridiagonal(d,dl)),2))  == Tridiagonal(zerosdl,zerosd,zerosdl)
+        @test @inferred(triu!(copy(Tridiagonal(dl,d,du))))    == Tridiagonal(zerosdl,d,du)
+        @test @inferred(triu!(copy(Tridiagonal(dl,d,du)),-1)) == Tridiagonal(dl,d,du)
+        @test @inferred(triu!(copy(Tridiagonal(dl,d,du)),1))  == Tridiagonal(zerosdl,zerosd,du)
+        @test @inferred(triu!(copy(Tridiagonal(dl,d,du)),2))  == Tridiagonal(zerosdl,zerosd,zerosdu)
 
         @test !istril(SymTridiagonal(d,dl))
         @test istril(SymTridiagonal(d,zerosdl))
@@ -259,6 +278,8 @@ end
                 @test_throws ArgumentError A[3, 2] = 1 # test assignment on the subdiagonal
                 @test_throws ArgumentError A[2, 3] = 1 # test assignment on the superdiagonal
             end
+            # setindex! should return the destination
+            @test setindex!(A, A[2,2], 2, 2) === A
         end
         @testset "diag" begin
             @test (@inferred diag(A))::typeof(d) == d
@@ -471,7 +492,7 @@ end
 end
 
 @testset "SymTridiagonal/Tridiagonal block matrix" begin
-    M = [1 2; 2 4]
+    M = [1 2; 3 4]
     n = 5
     A = SymTridiagonal(fill(M, n), fill(M, n-1))
     @test @inferred A[1,1] == Symmetric(M)
@@ -484,6 +505,12 @@ end
     @test_throws ArgumentError diag(A, 2)
     @test_throws ArgumentError diag(A, n+1)
     @test_throws ArgumentError diag(A, -n-1)
+    A[1,1] = Symmetric(2M)
+    @test A[1,1] == Symmetric(2M)
+    @test_throws ArgumentError A[1,1] = M
+
+    @test tr(A) == sum(diag(A))
+    @test issymmetric(tr(A))
 
     A = Tridiagonal(fill(M, n-1), fill(M, n), fill(M, n-1))
     @test @inferred A[1,1] == M
@@ -805,7 +832,7 @@ end
     @test copyto!(zero(S), T) == T
 
     T2 = Tridiagonal(ones(length(ev)), zero(dv), zero(ev))
-    @test_throws "cannot copy a non-symmetric Tridiagonal matrix to a SymTridiagonal" copyto!(zero(S), T2)
+    @test_throws "cannot copy an asymmetric Tridiagonal matrix to a SymTridiagonal" copyto!(zero(S), T2)
 
     @testset "mismatched sizes" begin
         dv2 = [4; @view dv[2:end]]
@@ -911,4 +938,123 @@ end
     end
 end
 
+@testset "rmul!/lmul! with numbers" begin
+    for T in (SymTridiagonal(rand(4), rand(3)), Tridiagonal(rand(3), rand(4), rand(3)))
+        @test rmul!(copy(T), 0.2) ≈ rmul!(Array(T), 0.2)
+        @test lmul!(0.2, copy(T)) ≈ lmul!(0.2, Array(T))
+        @test_throws ArgumentError rmul!(T, NaN)
+        @test_throws ArgumentError lmul!(NaN, T)
+    end
+    for T in (SymTridiagonal(rand(2), rand(1)), Tridiagonal(rand(1), rand(2), rand(1)))
+        @test all(isnan, rmul!(copy(T), NaN))
+        @test all(isnan, lmul!(NaN, copy(T)))
+    end
+end
+
+@testset "mul with empty arrays" begin
+    A = zeros(5,0)
+    T = Tridiagonal(zeros(0), zeros(0), zeros(0))
+    TL = Tridiagonal(zeros(4), zeros(5), zeros(4))
+    @test size(A * T) == size(A)
+    @test size(TL * A) == size(A)
+    @test size(T * T) == size(T)
+    C = similar(A)
+    @test mul!(C, A, T) == A * T
+    @test mul!(C, TL, A) == TL * A
+    @test mul!(similar(T), T, T) == T * T
+    @test mul!(similar(T, size(T)), T, T) == T * T
+
+    v = zeros(size(T,2))
+    @test size(T * v) == size(v)
+    @test mul!(similar(v), T, v) == T * v
+
+    D = Diagonal(zeros(size(T,2)))
+    @test size(T * D) == size(D * T) == size(D)
+    @test mul!(similar(D), T, D) == mul!(similar(D), D, T) == T * D
+end
+
+@testset "show" begin
+    T = Tridiagonal(1:3, 1:4, 1:3)
+    @test sprint(show, T) == "Tridiagonal(1:3, 1:4, 1:3)"
+    S = SymTridiagonal(1:4, 1:3)
+    @test sprint(show, S) == "SymTridiagonal(1:4, 1:3)"
+
+    m = SizedArrays.SizedArray{(2,2)}(reshape([1:4;],2,2))
+    T = Tridiagonal(fill(m,2), fill(m,3), fill(m,2))
+    @test sprint(show, T) == "Tridiagonal($(repr(diag(T,-1))), $(repr(diag(T))), $(repr(diag(T,1))))"
+    S = SymTridiagonal(fill(m,3), fill(m,2))
+    @test sprint(show, S) == "SymTridiagonal($(repr(diag(S))), $(repr(diag(S,1))))"
+end
+
+@testset "mul for small matrices" begin
+    @testset for n in 0:6
+        for T in (
+                Tridiagonal(rand(max(n-1,0)), rand(n), rand(max(n-1,0))),
+                SymTridiagonal(rand(n), rand(max(n-1,0))),
+                )
+            M = Matrix(T)
+            @test T * T ≈ M * M
+            @test mul!(similar(T, size(T)), T, T) ≈ M * M
+            @test mul!(ones(size(T)), T, T, 2, 4) ≈ M * M * 2 .+ 4
+
+            for m in 0:6
+                AR = rand(n,m)
+                AL = rand(m,n)
+                @test AL * T ≈ AL * M
+                @test T * AR ≈ M * AR
+                @test mul!(similar(AL), AL, T) ≈ AL * M
+                @test mul!(similar(AR), T, AR) ≈ M * AR
+                @test mul!(ones(size(AL)), AL, T, 2, 4) ≈ AL * M * 2 .+ 4
+                @test mul!(ones(size(AR)), T, AR, 2, 4) ≈ M * AR * 2 .+ 4
+            end
+
+            v = rand(n)
+            @test T * v ≈ M * v
+            @test mul!(similar(v), T, v) ≈ M * v
+
+            D = Diagonal(rand(n))
+            @test T * D ≈ M * D
+            @test D * T ≈ D * M
+            @test mul!(Tridiagonal(similar(T)), D, T) ≈ D * M
+            @test mul!(Tridiagonal(similar(T)), T, D) ≈ M * D
+            @test mul!(similar(T, size(T)), D, T) ≈ D * M
+            @test mul!(similar(T, size(T)), T, D) ≈ M * D
+            @test mul!(ones(size(T)), D, T, 2, 4) ≈ D * M * 2 .+ 4
+            @test mul!(ones(size(T)), T, D, 2, 4) ≈ M * D * 2 .+ 4
+
+            for uplo in (:U, :L)
+                B = Bidiagonal(rand(n), rand(max(0, n-1)), uplo)
+                @test T * B ≈ M * B
+                @test B * T ≈ B * M
+                if n <= 2
+                    @test mul!(Tridiagonal(similar(T)), B, T) ≈ B * M
+                    @test mul!(Tridiagonal(similar(T)), T, B) ≈ M * B
+                end
+                @test mul!(similar(T, size(T)), B, T) ≈ B * M
+                @test mul!(similar(T, size(T)), T, B) ≈ M * B
+                @test mul!(ones(size(T)), B, T, 2, 4) ≈ B * M * 2 .+ 4
+                @test mul!(ones(size(T)), T, B, 2, 4) ≈ M * B * 2 .+ 4
+            end
+        end
+    end
+
+    n = 4
+    arr = SizedArrays.SizedArray{(2,2)}(reshape([1:4;],2,2))
+    for T in (
+            SymTridiagonal(fill(arr,n), fill(arr,n-1)),
+            Tridiagonal(fill(arr,n-1), fill(arr,n), fill(arr,n-1)),
+            )
+        @test T * T ≈ Matrix(T) * Matrix(T)
+        BL = Bidiagonal(fill(arr,n), fill(arr,n-1), :L)
+        BU = Bidiagonal(fill(arr,n), fill(arr,n-1), :U)
+        @test BL * T ≈ Matrix(BL) * Matrix(T)
+        @test BU * T ≈ Matrix(BU) * Matrix(T)
+        @test T * BL ≈ Matrix(T) * Matrix(BL)
+        @test T * BU ≈ Matrix(T) * Matrix(BU)
+        D = Diagonal(fill(arr,n))
+        @test D * T ≈ Matrix(D) * Matrix(T)
+        @test T * D ≈ Matrix(T) * Matrix(D)
+    end
+end
+
 end # module TestTridiagonal
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
index 92547e8648d8a..d335cd6f63521 100644
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/test/uniformscaling.jl
@@ -226,6 +226,13 @@ let
         @test copyto!(B, J) == [λ zero(λ)]
     end
 
+    @testset "copy!" begin
+        A = Matrix{Int}(undef, (3,3))
+        @test copy!(A, I) == one(A)
+        B = Matrix{ComplexF64}(undef, (1,2))
+        @test copy!(B, J) == [λ zero(λ)]
+    end
+
     @testset "binary ops with vectors" begin
         v = complex.(randn(3), randn(3))
         # As shown in #20423@GitHub, vector acts like x1 matrix when participating in linear algebra
diff --git a/stdlib/Manifest.toml b/stdlib/Manifest.toml
index a9f02da6692a6..f9fb307190838 100644
--- a/stdlib/Manifest.toml
+++ b/stdlib/Manifest.toml
@@ -68,12 +68,12 @@ version = "1.11.0"
 [[deps.JuliaSyntaxHighlighting]]
 deps = ["StyledStrings"]
 uuid = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
-version = "1.11.0"
+version = "1.12.0"
 
 [[deps.LLD_jll]]
 deps = ["Artifacts", "Libdl", "Zlib_jll", "libLLVM_jll"]
 uuid = "d55e3150-da41-5e91-b323-ecfd1eec6109"
-version = "16.0.6+4"
+version = "18.1.7+2"
 
 [[deps.LLVMLibUnwind_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -113,12 +113,12 @@ version = "1.11.0+1"
 [[deps.LibUV_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
-version = "2.0.1+16"
+version = "2.0.1+17"
 
 [[deps.LibUnwind_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
-version = "1.8.1+0"
+version = "1.8.1+1"
 
 [[deps.Libdl]]
 uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
@@ -163,7 +163,7 @@ version = "1.2.0"
 [[deps.OpenBLAS_jll]]
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.26+2"
+version = "0.3.28+2"
 
 [[deps.OpenLibm_jll]]
 deps = ["Artifacts", "Libdl"]
@@ -190,11 +190,12 @@ uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 version = "1.11.0"
 
 [[deps.Profile]]
+deps = ["StyledStrings"]
 uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 version = "1.11.0"
 
 [[deps.REPL]]
-deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"]
+deps = ["InteractiveUtils", "JuliaSyntaxHighlighting", "Markdown", "Sockets", "StyledStrings", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 version = "1.11.0"
 
@@ -223,7 +224,7 @@ version = "1.11.0"
 [[deps.SparseArrays]]
 deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
 uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-version = "1.11.0"
+version = "1.12.0"
 
 [[deps.Statistics]]
 deps = ["LinearAlgebra"]
@@ -242,7 +243,7 @@ version = "1.11.0"
 [[deps.SuiteSparse_jll]]
 deps = ["Artifacts", "Libdl", "libblastrampoline_jll"]
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "7.7.0+0"
+version = "7.8.0+0"
 
 [[deps.TOML]]
 deps = ["Dates"]
@@ -281,12 +282,12 @@ version = "2.2.5+0"
 [[deps.libLLVM_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "16.0.6+4"
+version = "18.1.7+2"
 
 [[deps.libblastrampoline_jll]]
 deps = ["Artifacts", "Libdl"]
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.8.0+1"
+version = "5.11.0+0"
 
 [[deps.nghttp2_jll]]
 deps = ["Artifacts", "Libdl"]
diff --git a/stdlib/Markdown/Project.toml b/stdlib/Markdown/Project.toml
index e2edcdefea537..a48a3d1f0b345 100644
--- a/stdlib/Markdown/Project.toml
+++ b/stdlib/Markdown/Project.toml
@@ -4,8 +4,8 @@ version = "1.11.0"
 
 [deps]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
-StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 JuliaSyntaxHighlighting = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/Markdown/src/Markdown.jl b/stdlib/Markdown/src/Markdown.jl
index b9ff56297fe51..0d45d9e534df2 100644
--- a/stdlib/Markdown/src/Markdown.jl
+++ b/stdlib/Markdown/src/Markdown.jl
@@ -56,7 +56,8 @@ const MARKDOWN_FACES = [
 
 __init__() = foreach(addface!, MARKDOWN_FACES)
 
-parse(markdown::AbstractString; flavor = julia) = parse(IOBuffer(markdown), flavor = flavor)
+parse(markdown::String; flavor = julia) = parse(IOBuffer(markdown), flavor = flavor)
+parse(markdown::AbstractString; flavor = julia) = parse(String(markdown), flavor = flavor)
 parse_file(file::AbstractString; flavor = julia) = parse(read(file, String), flavor = flavor)
 
 function mdexpr(s, flavor = :julia)
@@ -122,4 +123,25 @@ import Base.Docs: catdoc
 
 catdoc(md::MD...) = MD(md...)
 
+if Base.generating_output()
+    # workload to reduce latency
+    md"""
+    # H1
+    ## H2
+    ### H3
+    **bold text**
+    *italicized text*
+    > blockquote
+    1. First item
+    2. Second item
+    3. Third item
+    - First item
+    - Second item
+    - Third item
+    `code`
+    Horizontal Rule
+    ---
+    """
+end
+
 end
diff --git a/stdlib/Markdown/src/render/terminal/formatting.jl b/stdlib/Markdown/src/render/terminal/formatting.jl
index 009fd2eb3af18..3274483801c77 100644
--- a/stdlib/Markdown/src/render/terminal/formatting.jl
+++ b/stdlib/Markdown/src/render/terminal/formatting.jl
@@ -66,3 +66,17 @@ function wraplines(content::Union{Annot, SubString{<:Annot}}, width::Integer = 8
     end
     lines
 end
+
+# Print horizontal lines between each docstring if there are multiple docs
+function insert_hlines(docs)
+    if !isa(docs, MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
+        return docs
+    end
+    docs = docs::MD
+    v = Any[]
+    for (n, doc) in enumerate(docs.content)
+        push!(v, doc)
+        n == length(docs.content) || push!(v, HorizontalRule())
+    end
+    return MD(v)
+end
diff --git a/stdlib/Markdown/src/render/terminal/render.jl b/stdlib/Markdown/src/render/terminal/render.jl
index 16cf413639b00..a97d273131536 100644
--- a/stdlib/Markdown/src/render/terminal/render.jl
+++ b/stdlib/Markdown/src/render/terminal/render.jl
@@ -13,7 +13,10 @@ function term(io::IO, content::Vector, cols)
     term(io, content[end], cols)
 end
 
-term(io::IO, md::MD, columns = cols(io)) = term(io, md.content, columns)
+function term(io::IO, md::MD, columns = cols(io))
+    md = insert_hlines(md)
+    return term(io, md.content, columns)
+end
 
 function term(io::IO, md::Paragraph, columns)
     lines = wraplines(annotprint(terminline, md.content), columns-2margin)
@@ -113,7 +116,7 @@ function term(io::AnnotIO, md::Header{l}, columns) where l
 end
 
 function term(io::IO, md::Code, columns)
-    code = if md.language ∈ ("", "julia")
+    code = if md.language == "julia"
         highlight(md.code)
     elseif md.language == "julia-repl" || Base.startswith(md.language, "jldoctest")
         hl = AnnotatedString(md.code)
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index a3026683ad1e7..35608f75b2426 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, Markdown, StyledStrings
-import Markdown: MD, Paragraph, Header, Italic, Bold, LineBreak, plain, term, html, rst, Table, Code, LaTeX, Footnote
+import Markdown: MD, Paragraph, Header, Italic, Bold, LineBreak, insert_hlines, plain, term, html, rst, Table, Code, LaTeX, Footnote
 import Base: show
 
 # Basics
@@ -1301,3 +1301,14 @@ end
 @testset "Docstrings" begin
     @test isempty(Docs.undocumented_names(Markdown))
 end
+
+@testset "Non-Markdown" begin
+    # https://github.com/JuliaLang/julia/issues/37765
+    @test isa(insert_hlines(Text("foo")), Text)
+    # https://github.com/JuliaLang/julia/issues/37757
+    @test insert_hlines(nothing) === nothing
+end
+
+@testset "Lazy Strings" begin
+    @test Markdown.parse(lazy"foo") == Markdown.parse("foo")
+end
diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl
index e6987582bf511..7d57bf053940d 100644
--- a/stdlib/Mmap/src/Mmap.jl
+++ b/stdlib/Mmap/src/Mmap.jl
@@ -86,6 +86,8 @@ grow!(::Anonymous,o::Integer,l::Integer) = return
 function grow!(io::IO, offset::Integer, len::Integer)
     pos = position(io)
     filelen = filesize(io)
+    # If non-regular file skip trying to grow since we know that will fail the ftruncate syscall
+    filelen == 0 && !isfile(io) && return
     if filelen < offset + len
         failure = ccall(:jl_ftruncate, Cint, (Cint, Int64), fd(io), offset+len)
         Base.systemerror(:ftruncate, failure != 0)
@@ -211,14 +213,12 @@ function mmap(io::IO,
     szfile = convert(Csize_t, len + offset)
     requestedSizeLarger = false
     if !(io isa Mmap.Anonymous)
-        @static if !Sys.isapple()
-            requestedSizeLarger = szfile > filesize(io)
-        end
+        requestedSizeLarger = szfile > filesize(io)
     end
     # platform-specific mmapping
     @static if Sys.isunix()
         prot, flags, iswrite = settings(file_desc, shared)
-        if requestedSizeLarger
+        if requestedSizeLarger && isfile(io) # add a condition to this line to ensure it only checks files
             if iswrite
                 if grow
                     grow!(io, offset, len)
@@ -229,9 +229,6 @@ function mmap(io::IO,
                 throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions"))
             end
         end
-        @static if Sys.isapple()
-            iswrite && grow && grow!(io, offset, len)
-        end
         # mmap the file
         ptr = ccall(:jl_mmap, Ptr{Cvoid}, (Ptr{Cvoid}, Csize_t, Cint, Cint, RawFD, Int64),
             C_NULL, mmaplen, prot, flags, file_desc, offset_page)
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
index 95dc40e6a0c2b..a9a1a04facff5 100644
--- a/stdlib/OpenBLAS_jll/Project.toml
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "OpenBLAS_jll"
 uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
-version = "0.3.27+1"
+version = "0.3.28+2"
 
 [deps]
 # See note in `src/OpenBLAS_jll.jl` about this dependency.
diff --git a/stdlib/OpenBLAS_jll/test/runtests.jl b/stdlib/OpenBLAS_jll/test/runtests.jl
index 1d944bab8cd67..76242b2e4080e 100644
--- a/stdlib/OpenBLAS_jll/test/runtests.jl
+++ b/stdlib/OpenBLAS_jll/test/runtests.jl
@@ -13,5 +13,5 @@ else
 end
 
 @testset "OpenBLAS_jll" begin
-    @test dlsym(OpenBLAS_jll.libopenblas_handle, @blasfunc(openblas_set_num_threads); throw_error=false) != nothing
+    @test dlsym(OpenBLAS_jll.libopenblas_handle, @blasfunc(openblas_set_num_threads); throw_error=false) !== nothing
 end
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 9d6c9ddaca2ba..34233c58702b4 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,4 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = 046df8ce407659cfaccc647265a6e57bfb02e056
+PKG_SHA1 = 51d4910c114a863d888659cb8962c1e161b2a421
 PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
 PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index 9b636d1180598..fd38b3ebd3573 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -297,11 +297,11 @@ end
 @inline function rmdynamic(spec::Spec{T}, args, argp) where {T}
     zero, width, precision = spec.zero, spec.width, spec.precision
     if spec.dynamic_width
-        width = args[argp]
+        width = args[argp]::Integer
         argp += 1
     end
     if spec.dynamic_precision
-        precision = args[argp]
+        precision = args[argp]::Integer
         if zero && T <: Ints && precision > 0
             zero = false
         end
@@ -310,12 +310,12 @@ end
     (Spec{T}(spec.leftalign, spec.plus, spec.space, zero, spec.hash, width, precision, false, false), argp)
 end
 
-@inline function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
+Base.@constprop :aggressive function fmt(buf, pos, args, argp, spec::Spec{T}) where {T}
     spec, argp = rmdynamic(spec, args, argp)
     (fmt(buf, pos, args[argp], spec), argp+1)
 end
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
     leftalign, width = spec.leftalign, spec.width
     c = Char(first(arg))
     w = textwidth(c)
@@ -336,7 +336,7 @@ end
 end
 
 # strings
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
     leftalign, hash, width, prec = spec.leftalign, spec.hash, spec.width, spec.precision
     str = string(arg)
     slen = textwidth(str)::Int + (hash ? arg isa AbstractString ? 2 : 1 : 0)
@@ -383,7 +383,7 @@ toint(x::Rational) = Integer(x)
 fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
     fmt(buf, pos, arg, floatfmt(spec))
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Ints}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     bs = base(T)
@@ -497,7 +497,7 @@ _snprintf(ptr, siz, str, arg) =
 # seems like a dangerous thing to do.
 const __BIG_FLOAT_MAX__ = 8192
 
-@inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
+function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     x = tofloat(arg)
@@ -931,7 +931,8 @@ for more details on C `printf` support.
 """
 function format end
 
-function format(io::IO, f::Format, args...) # => Nothing
+# Since it will specialize on `f`, which has a Tuple-type often of length(args), we might as well specialize on `args` too.
+function format(io::IO, f::Format, args::Vararg{Any,N}) where N # => Nothing
     f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
@@ -939,7 +940,7 @@ function format(io::IO, f::Format, args...) # => Nothing
     return
 end
 
-function format(f::Format, args...) # => String
+function format(f::Format, args::Vararg{Any,N}) where N # => String
     f.numarguments == length(args) || argmismatch(f.numarguments, length(args))
     buf = Base.StringVector(computelen(f.substringranges, f.formats, args))
     pos = format(buf, 1, f, args...)
diff --git a/stdlib/Profile/Project.toml b/stdlib/Profile/Project.toml
index ad0107ecf9404..13cd11f70d9b4 100644
--- a/stdlib/Profile/Project.toml
+++ b/stdlib/Profile/Project.toml
@@ -2,6 +2,12 @@ name = "Profile"
 uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 version = "1.11.0"
 
+[deps]
+StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
+
+[compat]
+StyledStrings = "1.11.0"
+
 [extras]
 Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md
index 5b4db77b9cb16..0b358e5decfa9 100644
--- a/stdlib/Profile/docs/src/index.md
+++ b/stdlib/Profile/docs/src/index.md
@@ -155,3 +155,8 @@ julia> Profile.HeapSnapshot.assemble_snapshot("snapshot", "snapshot.heapsnapshot
 
 The resulting heap snapshot file can be uploaded to chrome devtools to be viewed.
 For more information, see the [chrome devtools docs](https://developer.chrome.com/docs/devtools/memory-problems/heap-snapshots/#view_snapshots).
+An alternative for analyzing Chromium heap snapshots is with the VS Code extension
+`ms-vscode.vscode-js-profile-flame`.
+
+The Firefox heap snapshots are of a different format, and Firefox currently may
+*not* be used for viewing the heap snapshots generated by Julia.
diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
index 31d703a151ad8..9d0b18cb468ca 100644
--- a/stdlib/Profile/src/Allocs.jl
+++ b/stdlib/Profile/src/Allocs.jl
@@ -321,7 +321,7 @@ end
 function flat(io::IO, data::Vector{Alloc}, cols::Int, fmt::ProfileFormat)
     fmt.combine || error(ArgumentError("combine=false"))
     lilist, n, m, totalbytes = parse_flat(fmt.combine ? StackFrame : UInt64, data, fmt.C)
-    filenamemap = Dict{Symbol,String}()
+    filenamemap = Profile.FileNameMap()
     if isempty(lilist)
         warning_empty()
         return true
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index 062b608b25c59..b753c9ca88f24 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -7,7 +7,7 @@ Profiling support.
 
 ## CPU profiling
 - `@profile foo()` to profile a specific call.
-- `Profile.print()` to print the report.
+- `Profile.print()` to print the report. Paths are clickable links in supported terminals and specialized for JULIA_EDITOR etc.
 - `Profile.clear()` to clear the buffer.
 - Send a $(Sys.isbsd() ? "SIGINFO (ctrl-t)" : "SIGUSR1") signal to the process to automatically trigger a profile and print.
 
@@ -38,6 +38,8 @@ public clear,
     Allocs
 
 import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
+import Base: AnnotatedString
+using StyledStrings: @styled_str
 
 const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
 
@@ -63,10 +65,10 @@ end
 
 # An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
 function _peek_report()
-    iob = IOBuffer()
+    iob = Base.AnnotatedIOBuffer()
     ioc = IOContext(IOContext(iob, stderr), :displaysize=>displaysize(stderr))
     print(ioc, groupby = [:thread, :task])
-    Base.print(stderr, String(take!(iob)))
+    Base.print(stderr, read(seekstart(iob), AnnotatedString))
 end
 # This is a ref so that it can be overridden by other profile info consumers.
 const peek_report = Ref{Function}(_peek_report)
@@ -196,7 +198,9 @@ const META_OFFSET_THREADID = 5
 
 Prints profiling results to `io` (by default, `stdout`). If you do not
 supply a `data` vector, the internal buffer of accumulated backtraces
-will be used.
+will be used. Paths are clickable links in supported terminals and
+specialized for [`JULIA_EDITOR`](@ref) with line numbers, or just file
+links if no editor is set.
 
 The keyword arguments can be any combination of:
 
@@ -266,7 +270,7 @@ function print(io::IO,
         end
         any_nosamples = true
         if format === :tree
-            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+            Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
             Base.print(io, "=========================================================\n")
         end
         if groupby == [:task, :thread]
@@ -280,7 +284,7 @@ function print(io::IO,
                     nl = length(threadids) > 1 ? "\n" : ""
                     printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
                     for threadid in threadids
-                        printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
+                        printstyled(io, " Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
                         nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
                         nosamples && (any_nosamples = true)
                         println(io)
@@ -296,7 +300,7 @@ function print(io::IO,
                     any_nosamples = true
                 else
                     nl = length(taskids) > 1 ? "\n" : ""
-                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
+                    printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid)))$nl"; bold=true, color=Base.info_color())
                     for taskid in taskids
                         printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
                         nosamples = print_group(io, data, lidict, pf, format, threadid, taskid, true)
@@ -320,7 +324,7 @@ function print(io::IO,
             threadids = intersect(get_thread_ids(data), threads)
             isempty(threadids) && (any_nosamples = true)
             for threadid in threadids
-                printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
+                printstyled(io, "Thread $threadid ($(Threads.threadpooldescription(threadid))) "; bold=true, color=Base.info_color())
                 nosamples = print_group(io, data, lidict, pf, format, threadid, tasks, true)
                 nosamples && (any_nosamples = true)
                 println(io)
@@ -501,12 +505,23 @@ function flatten(data::Vector, lidict::LineInfoDict)
     return (newdata, newdict)
 end
 
+const SRC_DIR = normpath(joinpath(Sys.BUILD_ROOT_PATH, "src"))
+
 # Take a file-system path and try to form a concise representation of it
 # based on the package ecosystem
-function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
+function short_path(spath::Symbol, filenamecache::Dict{Symbol, Tuple{String,String,String}})
     return get!(filenamecache, spath) do
-        path = string(spath)
-        if isabspath(path)
+        path = Base.fixup_stdlib_path(string(spath))
+        path_norm = normpath(path)
+        possible_base_path = normpath(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
+        lib_dir = abspath(Sys.BINDIR, Base.LIBDIR)
+        if startswith(path_norm, SRC_DIR)
+            remainder = only(split(path_norm, SRC_DIR, keepempty=false))
+            return (isfile(path_norm) ? path_norm : ""), "@juliasrc", remainder
+        elseif startswith(path_norm, lib_dir)
+            remainder = only(split(path_norm, lib_dir, keepempty=false))
+            return (isfile(path_norm) ? path_norm : ""), "@julialib", remainder
+        elseif isabspath(path)
             if ispath(path)
                 # try to replace the file-system prefix with a short "@Module" one,
                 # assuming that profile came from the current machine
@@ -522,20 +537,21 @@ function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
                             pkgid = Base.project_file_name_uuid(project_file, "")
                             isempty(pkgid.name) && return path # bad Project file
                             # return the joined the module name prefix and path suffix
-                            path = path[nextind(path, sizeof(root)):end]
-                            return string("@", pkgid.name, path)
+                            _short_path = path[nextind(path, sizeof(root)):end]
+                            return path, string("@", pkgid.name), _short_path
                         end
                     end
                 end
             end
-            return path
-        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
+            return path, "", path
+        elseif isfile(possible_base_path)
             # do the same mechanic for Base (or Core/Compiler) files as above,
             # but they start from a relative path
-            return joinpath("@Base", normpath(path))
+            return possible_base_path, "@Base", normpath(path)
         else
             # for non-existent relative paths (such as "REPL[1]"), just consider simplifying them
-            return normpath(path) # drop leading "./"
+            path = normpath(path)
+            return "", "", path # drop leading "./"
         end
     end
 end
@@ -678,10 +694,10 @@ function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
     !isempty(data) && has_meta(data) && error("input already has metadata")
     cpu_clock_cycle = UInt64(99)
     data_with_meta = similar(data, 0)
-    for i = 1:length(data)
+    for i in eachindex(data)
         val = data[i]
         if iszero(val)
-            # (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+            # META_OFFSET_THREADID, META_OFFSET_TASKID, META_OFFSET_CPUCYCLECLOCK, META_OFFSET_SLEEPSTATE
             push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
         else
             push!(data_with_meta, val)
@@ -756,6 +772,8 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
     return (lilist, n, m, totalshots, nsleeping)
 end
 
+const FileNameMap = Dict{Symbol,Tuple{String,String,String}}
+
 function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
                 threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
     lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
@@ -766,7 +784,7 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
         m = m[keep]
     end
     util_perc = (1 - (nsleeping / totalshots)) * 100
-    filenamemap = Dict{Symbol,String}()
+    filenamemap = FileNameMap()
     if isempty(lilist)
         if is_subsection
             Base.print(io, "Total snapshots: ")
@@ -788,9 +806,43 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
     return false
 end
 
+# make a terminal-clickable link to the file and linenum.
+# Similar to `define_default_editors` in `Base.Filesystem` but for creating URIs not commands
+function editor_link(path::String, linenum::Int)
+    # Note: the editor path can include spaces (if escaped) and flags.
+    editor = nothing
+    for var in ["JULIA_EDITOR", "VISUAL", "EDITOR"]
+        str = get(ENV, var, nothing)
+        str isa String || continue
+        editor = str
+        break
+    end
+    path_encoded = Base.Filesystem.encode_uri_component(path)
+    if editor !== nothing
+        if editor == "code"
+            return "vscode://file/$path_encoded:$linenum"
+        elseif editor == "subl" || editor == "sublime_text"
+            return "subl://open?url=file://$path_encoded&line=$linenum"
+        elseif editor == "idea" || occursin("idea", editor)
+            return "idea://open?file=$path_encoded&line=$linenum"
+        elseif editor == "pycharm"
+            return "pycharm://open?file=$path_encoded&line=$linenum"
+        elseif editor == "atom"
+            return "atom://core/open/file?filename=$path_encoded&line=$linenum"
+        elseif editor == "emacsclient" || editor == "emacs"
+            return "emacs://open?file=$path_encoded&line=$linenum"
+        elseif editor == "vim" || editor == "nvim"
+            # Note: Vim/Nvim may not support standard URI schemes without specific plugins
+            return "vim://open?file=$path_encoded&line=$linenum"
+        end
+    end
+    # fallback to generic URI, but line numbers are not supported by generic URI
+    return Base.Filesystem.uripath(path)
+end
+
 function print_flat(io::IO, lilist::Vector{StackFrame},
         n::Vector{Int}, m::Vector{Int},
-        cols::Int, filenamemap::Dict{Symbol,String},
+        cols::Int, filenamemap::FileNameMap,
         fmt::ProfileFormat)
     if fmt.sortedby === :count
         p = sortperm(n)
@@ -802,18 +854,18 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
     lilist = lilist[p]
     n = n[p]
     m = m[p]
-    filenames = String[short_path(li.file, filenamemap) for li in lilist]
+    pkgnames_filenames = Tuple{String,String,String}[short_path(li.file, filenamemap) for li in lilist]
     funcnames = String[string(li.func) for li in lilist]
     wcounts = max(6, ndigits(maximum(n)))
     wself = max(9, ndigits(maximum(m)))
     maxline = 1
     maxfile = 6
     maxfunc = 10
-    for i in 1:length(lilist)
+    for i in eachindex(lilist)
         li = lilist[i]
         maxline = max(maxline, li.line)
-        maxfunc = max(maxfunc, length(funcnames[i]))
-        maxfile = max(maxfile, length(filenames[i]))
+        maxfunc = max(maxfunc, textwidth(funcnames[i]))
+        maxfile = max(maxfile, sum(textwidth, pkgnames_filenames[i][2:3]) + 1)
     end
     wline = max(5, ndigits(maxline))
     ntext = max(20, cols - wcounts - wself - wline - 3)
@@ -829,7 +881,7 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
             rpad("File", wfile, " "), " ", lpad("Line", wline, " "), " Function")
     println(io, lpad("=====", wcounts, " "), " ", lpad("========", wself, " "), " ",
             rpad("====", wfile, " "), " ", lpad("====", wline, " "), " ========")
-    for i = 1:length(n)
+    for i in eachindex(n)
         n[i] < fmt.mincount && continue
         li = lilist[i]
         Base.print(io, lpad(string(n[i]), wcounts, " "), " ")
@@ -841,16 +893,29 @@ function print_flat(io::IO, lilist::Vector{StackFrame},
                 Base.print(io, "[any unknown stackframes]")
             end
         else
-            file = filenames[i]
+            path, pkgname, file = pkgnames_filenames[i]
             isempty(file) && (file = "[unknown file]")
-            Base.print(io, rpad(rtruncto(file, wfile), wfile, " "), " ")
+            pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
+            Base.printstyled(io, pkgname, color=pkgcolor)
+            file_trunc = ltruncate(file, max(1, wfile))
+            wpad = wfile - textwidth(pkgname)
+            if !isempty(pkgname) && !startswith(file_trunc, "/")
+                Base.print(io, "/")
+                wpad -= 1
+            end
+            if isempty(path)
+                Base.print(io, rpad(file_trunc, wpad, " "))
+            else
+                link = editor_link(path, li.line)
+                Base.print(io, rpad(styled"{link=$link:$file_trunc}", wpad, " "))
+            end
             Base.print(io, lpad(li.line > 0 ? string(li.line) : "?", wline, " "), " ")
             fname = funcnames[i]
             if !li.from_c && li.linfo !== nothing
                 fname = sprint(show_spec_linfo, li)
             end
             isempty(fname) && (fname = "[unknown function]")
-            Base.print(io, ltruncto(fname, wfunc))
+            Base.print(io, rtruncate(fname, wfunc))
         end
         println(io)
     end
@@ -889,21 +954,24 @@ function indent(depth::Int)
     return indent
 end
 
-function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::Dict{Symbol,String}, showpointer::Bool)
+# mimics Stacktraces
+const PACKAGE_FIXEDCOLORS = Dict{String, Any}("@Base" => :gray, "@Core" => :gray)
+
+function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, maxes, filenamemap::FileNameMap, showpointer::Bool)
     nindent = min(cols>>1, level)
     ndigoverhead = ndigits(maxes.overhead)
     ndigcounts = ndigits(maxes.count)
     ndigline = ndigits(maximum(frame.frame.line for frame in frames)) + 6
     ntext = max(30, cols - ndigoverhead - nindent - ndigcounts - ndigline - 6)
     widthfile = 2*ntext÷5 # min 12
-    strs = Vector{String}(undef, length(frames))
+    strs = Vector{AnnotatedString{String}}(undef, length(frames))
     showextra = false
     if level > nindent
         nextra = level - nindent
         nindent -= ndigits(nextra) + 2
         showextra = true
     end
-    for i = 1:length(frames)
+    for i in eachindex(frames)
         frame = frames[i]
         li = frame.frame
         stroverhead = lpad(frame.overhead > 0 ? string(frame.overhead) : "", ndigoverhead, " ")
@@ -924,7 +992,7 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
                 else
                     fname = string(li.func)
                 end
-                filename = short_path(li.file, filenamemap)
+                path, pkgname, filename = short_path(li.file, filenamemap)
                 if showpointer
                     fname = string(
                         "0x",
@@ -932,17 +1000,26 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
                         " ",
                         fname)
                 end
-                strs[i] = string(stroverhead, "╎", base, strcount, " ",
-                    rtruncto(filename, widthfile),
-                    ":",
-                    li.line == -1 ? "?" : string(li.line),
-                    "; ",
-                    fname)
+                pkgcolor = get!(() -> popfirst!(Base.STACKTRACE_MODULECOLORS), PACKAGE_FIXEDCOLORS, pkgname)
+                remaining_path = ltruncate(filename, max(1, widthfile - textwidth(pkgname) - 1))
+                linenum = li.line == -1 ? "?" : string(li.line)
+                slash = (!isempty(pkgname) && !startswith(remaining_path, "/")) ? "/" : ""
+                styled_path = styled"{$pkgcolor:$pkgname}$slash$remaining_path:$linenum"
+                rich_file = if isempty(path)
+                    styled_path
+                else
+                    link = editor_link(path, li.line)
+                    styled"{link=$link:$styled_path}"
+                end
+                strs[i] = Base.annotatedstring(stroverhead, "╎", base, strcount, " ", rich_file, "  ", fname)
+                if frame.overhead > 0
+                    strs[i] = styled"{bold:$(strs[i])}"
+                end
             end
         else
             strs[i] = string(stroverhead, "╎", base, strcount, " [unknown stackframe]")
         end
-        strs[i] = ltruncto(strs[i], cols)
+        strs[i] = rtruncate(strs[i], cols)
     end
     return strs
 end
@@ -1101,10 +1178,10 @@ end
 # avoid stack overflows.
 function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
     maxes = maxstats(bt)
-    filenamemap = Dict{Symbol,String}()
-    worklist = [(bt, 0, 0, "")]
+    filenamemap = FileNameMap()
+    worklist = [(bt, 0, 0, AnnotatedString(""))]
     if !is_subsection
-        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line; Function\n")
+        Base.print(io, "Overhead ╎ [+additional indent] Count File:Line  Function\n")
         Base.print(io, "=========================================================\n")
     end
     while !isempty(worklist)
@@ -1135,7 +1212,7 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
             count = down.count
             count < fmt.mincount && continue
             count < noisefloor && continue
-            str = strs[i]
+            str = strs[i]::AnnotatedString
             noisefloor_down = fmt.noisefloor > 0 ? floor(Int, fmt.noisefloor * sqrt(count)) : 0
             pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
         end
@@ -1196,24 +1273,7 @@ function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
     return [(v[i], k[i]) for i in p]
 end
 
-# Utilities
-function rtruncto(str::String, w::Int)
-    if textwidth(str) <= w
-        return str
-    else
-        return string("…", str[prevind(str, end, w-2):end])
-    end
-end
-function ltruncto(str::String, w::Int)
-    if textwidth(str) <= w
-        return str
-    else
-        return string(str[1:nextind(str, 1, w-2)], "…")
-    end
-end
-
-
-truncto(str::Symbol, w::Int) = truncto(string(str), w)
+## Utilities
 
 # Order alphabetically (file, function) and then by line number
 function liperm(lilist::Vector{StackFrame})
@@ -1250,8 +1310,10 @@ end
 
 
 """
-    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false)
-    Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false)
+    Profile.take_heap_snapshot(filepath::String, all_one::Bool=false;
+                               redact_data::Bool=true, streaming::Bool=false)
+    Profile.take_heap_snapshot(all_one::Bool=false; redact_data:Bool=true,
+                               dir::String=nothing, streaming::Bool=false)
 
 Write a snapshot of the heap, in the JSON format expected by the Chrome
 Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file
@@ -1262,6 +1324,8 @@ full file path, or IO stream.
 If `all_one` is true, then report the size of every object as one so they can be easily
 counted. Otherwise, report the actual size.
 
+If `redact_data` is true (default), then do not emit the contents of any object.
+
 If `streaming` is true, we will stream the snapshot data out into four files, using filepath
 as the prefix, to avoid having to hold the entire snapshot in memory. This option should be
 used for any setting where your memory is constrained. These files can then be reassembled
@@ -1277,28 +1341,28 @@ backwards-compatibility) and your process is killed, note that this will always
 parts in the same directory as your provided filepath, so you can still reconstruct the
 snapshot after the fact, via `assemble_snapshot()`.
 """
-function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false)
+function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; redact_data::Bool=true, streaming::Bool=false)
     if streaming
-        _stream_heap_snapshot(filepath, all_one)
+        _stream_heap_snapshot(filepath, all_one, redact_data)
     else
         # Support the legacy, non-streaming mode, by first streaming the parts, then
         # reassembling it after we're done.
         prefix = filepath
-        _stream_heap_snapshot(prefix, all_one)
+        _stream_heap_snapshot(prefix, all_one, redact_data)
         Profile.HeapSnapshot.assemble_snapshot(prefix, filepath)
         Profile.HeapSnapshot.cleanup_streamed_files(prefix)
     end
     return filepath
 end
-function take_heap_snapshot(io::IO, all_one::Bool=false)
+function take_heap_snapshot(io::IO, all_one::Bool=false; redact_data::Bool=true)
     # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir,
     # then reassembling it after we're done.
     dir = tempdir()
     prefix = joinpath(dir, "snapshot")
-    _stream_heap_snapshot(prefix, all_one)
+    _stream_heap_snapshot(prefix, all_one, redact_data)
     Profile.HeapSnapshot.assemble_snapshot(prefix, io)
 end
-function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
+function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool, redact_data::Bool)
     # Nodes and edges are binary files
     open("$prefix.nodes", "w") do nodes
         open("$prefix.edges", "w") do edges
@@ -1311,9 +1375,9 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
                     Base.@_lock_ios(json,
                         ccall(:jl_gc_take_heap_snapshot,
                             Cvoid,
-                            (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar),
+                            (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar, Cchar),
                             nodes.handle, edges.handle, strings.handle, json.handle,
-                            Cchar(all_one))
+                            Cchar(all_one), Cchar(redact_data))
                     )
                     )
                     )
@@ -1323,7 +1387,7 @@ function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool)
         end
     end
 end
-function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString}
+function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing, kwargs...) where {S <: AbstractString}
     fname = "$(getpid())_$(time_ns()).heapsnapshot"
     if isnothing(dir)
         wd = pwd()
@@ -1338,7 +1402,7 @@ function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing)
     else
         fpath = joinpath(expanduser(dir), fname)
     end
-    return take_heap_snapshot(fpath, all_one)
+    return take_heap_snapshot(fpath, all_one; kwargs...)
 end
 
 """
diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl
index 50da13e550d82..b2d86ee1f27b6 100644
--- a/stdlib/Profile/src/heapsnapshot_reassemble.jl
+++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl
@@ -99,40 +99,42 @@ function assemble_snapshot(in_prefix, io::IO)
 
     orphans = Set{UInt}() # nodes that have no incoming edges
     # Parse nodes with empty edge counts that we need to fill later
-    nodes_file = open(string(in_prefix, ".nodes"), "r")
-    for i in 1:length(nodes)
-        node_type = read(nodes_file, Int8)
-        node_name_idx = read(nodes_file, UInt)
-        id = read(nodes_file, UInt)
-        self_size = read(nodes_file, Int)
-        @assert read(nodes_file, Int) == 0 # trace_node_id
-        @assert read(nodes_file, Int8) == 0 # detachedness
-
-        nodes.type[i] = node_type
-        nodes.name_idx[i] = node_name_idx
-        nodes.id[i] = id
-        nodes.self_size[i] = self_size
-        nodes.edge_count[i] = 0 # edge_count
-        # populate the orphans set with node index
-        push!(orphans, i-1)
+    open(string(in_prefix, ".nodes"), "r") do nodes_file
+        for i in 1:length(nodes)
+            node_type = read(nodes_file, Int8)
+            node_name_idx = read(nodes_file, UInt)
+            id = read(nodes_file, UInt)
+            self_size = read(nodes_file, Int)
+            @assert read(nodes_file, Int) == 0 # trace_node_id
+            @assert read(nodes_file, Int8) == 0 # detachedness
+
+            nodes.type[i] = node_type
+            nodes.name_idx[i] = node_name_idx
+            nodes.id[i] = id
+            nodes.self_size[i] = self_size
+            nodes.edge_count[i] = 0 # edge_count
+            # populate the orphans set with node index
+            push!(orphans, i-1)
+        end
     end
 
     # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets
-    edges_file = open(string(in_prefix, ".edges"), "r")
-    for i in 1:length(nodes.edges)
-        edge_type = read(edges_file, Int8)
-        edge_name_or_index = read(edges_file, UInt)
-        from_node = read(edges_file, UInt)
-        to_node = read(edges_file, UInt)
-
-        nodes.edges.type[i] = edge_type
-        nodes.edges.name_or_index[i] = edge_name_or_index
-        nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7
-        nodes.edge_count[from_node + 1] += UInt32(1)  # C and JSON use 0-based indexing
-        push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges
-        # remove the node from the orphans if it has at least one incoming edge
-        if to_node in orphans
-            delete!(orphans, to_node)
+    open(string(in_prefix, ".edges"), "r") do edges_file
+        for i in 1:length(nodes.edges)
+            edge_type = read(edges_file, Int8)
+            edge_name_or_index = read(edges_file, UInt)
+            from_node = read(edges_file, UInt)
+            to_node = read(edges_file, UInt)
+
+            nodes.edges.type[i] = edge_type
+            nodes.edges.name_or_index[i] = edge_name_or_index
+            nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7
+            nodes.edge_count[from_node + 1] += UInt32(1)  # C and JSON use 0-based indexing
+            push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges
+            # remove the node from the orphans if it has at least one incoming edge
+            if to_node in orphans
+                delete!(orphans, to_node)
+            end
         end
     end
 
@@ -153,7 +155,8 @@ function assemble_snapshot(in_prefix, io::IO)
         _write_decimal_number(io, nodes.edge_count[i], _digits_buf)
         print(io, ",0,0")
     end
-    print(io, "],\"edges\":[")
+    print(io, "],\n")
+    print(io, "\"edges\":[")
     e = 1
     for n in 1:length(nodes)
         count = nodes.edge_count[n]
@@ -175,6 +178,13 @@ function assemble_snapshot(in_prefix, io::IO)
     end
     println(io, "],")
 
+    # not used. Required by microsoft/vscode-v8-heap-tools
+    # This order of these fields is required by chrome dev tools otherwise loading fails
+    println(io, "\"trace_function_infos\":[],")
+    println(io, "\"trace_tree\":[],")
+    println(io, "\"samples\":[],")
+    println(io, "\"locations\":[],")
+
     println(io, "\"strings\":[")
     open(string(in_prefix, ".strings"), "r") do strings_io
         first = true
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index cbfdde61d7054..1769cbd12da3e 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -168,12 +168,15 @@ let cmd = Base.julia_cmd()
         println("done")
         print(Profile.len_data())
         """
-    p = open(`$cmd -e $script`)
+    # use multiple threads here to ensure that profiling works with threading
+    p = open(`$cmd -t2 -e $script`)
     t = Timer(120) do t
         # should be under 10 seconds, so give it 2 minutes then report failure
         println("KILLING debuginfo registration test BY PROFILE TEST WATCHDOG\n")
-        kill(p, Base.SIGTERM)
-        sleep(10)
+        kill(p, Base.SIGQUIT)
+        sleep(30)
+        kill(p, Base.SIGQUIT)
+        sleep(30)
         kill(p, Base.SIGKILL)
     end
     s = read(p, String)
@@ -202,8 +205,10 @@ if Sys.isbsd() || Sys.islinux()
             t = Timer(120) do t
                 # should be under 10 seconds, so give it 2 minutes then report failure
                 println("KILLING siginfo/sigusr1 test BY PROFILE TEST WATCHDOG\n")
-                kill(p, Base.SIGTERM)
-                sleep(10)
+                kill(p, Base.SIGQUIT)
+                sleep(30)
+                kill(p, Base.SIGQUIT)
+                sleep(30)
                 kill(p, Base.SIGKILL)
                 close(notify_exit)
             end
@@ -275,16 +280,31 @@ end
 
 @testset "HeapSnapshot" begin
     tmpdir = mktempdir()
+
+    # ensure that we can prevent redacting data
     fname = cd(tmpdir) do
-        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; print(Profile.take_heap_snapshot())"`, String)
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot(; redact_data=false))"`, String)
     end
 
     @test isfile(fname)
 
-    open(fname) do fs
-        @test readline(fs) != ""
+    sshot = read(fname, String)
+    @test sshot != ""
+    @test contains(sshot, "redact_this")
+
+    rm(fname)
+
+    # ensure that string data is redacted by default
+    fname = cd(tmpdir) do
+        read(`$(Base.julia_cmd()) --startup-file=no -e "using Profile; const x = \"redact_this\"; print(Profile.take_heap_snapshot())"`, String)
     end
 
+    @test isfile(fname)
+
+    sshot = read(fname, String)
+    @test sshot != ""
+    @test !contains(sshot, "redact_this")
+
     rm(fname)
     rm(tmpdir, force = true, recursive = true)
 end
diff --git a/stdlib/REPL/Project.toml b/stdlib/REPL/Project.toml
index e07bbf07a2a76..f60a6a4766093 100644
--- a/stdlib/REPL/Project.toml
+++ b/stdlib/REPL/Project.toml
@@ -4,6 +4,7 @@ version = "1.11.0"
 
 [deps]
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
+JuliaSyntaxHighlighting = "dc6e5ff7-fb65-4e79-a425-ec3bc9c03011"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
 StyledStrings = "f489334b-da3d-4c2e-b8f0-e476e12c162b"
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index 42a9666088ac1..6250fc84dc6b2 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -50,7 +50,7 @@ julia> ans
 
 In Julia mode, the REPL supports something called *prompt pasting*. This activates when pasting text
 that starts with `julia> ` into the REPL. In that case, only expressions starting with `julia> ` (as
-well as the other REPL mode prompts: `shell> `, `help?> `, `pkg>` ) are parsed, but others are
+well as the other REPL mode prompts: `shell> `, `help?> `, `pkg> ` ) are parsed, but others are
 removed. This makes it possible to paste a chunk of text that has been copied from a REPL session
 without having to scrub away prompts and outputs. This feature is enabled by default but can be
 disabled or enabled at will with `REPL.enable_promptpaste(::Bool)`. If it is enabled, you can try it
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index 7b127a343a98c..c92dca8c8e015 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -3,7 +3,7 @@
 module LineEdit
 
 import ..REPL
-using REPL: AbstractREPL, Options
+using ..REPL: AbstractREPL, Options
 
 using ..Terminals
 import ..Terminals: raw!, width, height, clear_line, beep
@@ -66,6 +66,7 @@ show(io::IO, x::Prompt) = show(io, string("Prompt(\"", prompt_string(x.prompt),
 mutable struct MIState
     interface::ModalInterface
     active_module::Module
+    previous_active_module::Module
     current_mode::TextInterface
     aborted::Bool
     mode_state::IdDict{TextInterface,ModeState}
@@ -78,7 +79,7 @@ mutable struct MIState
     async_channel::Channel{Function}
 end
 
-MIState(i, mod, c, a, m) = MIState(i, mod, c, a, m, String[], 0, Char[], 0, :none, :none, Channel{Function}())
+MIState(i, mod, c, a, m) = MIState(i, mod, mod, c, a, m, String[], 0, Char[], 0, :none, :none, Channel{Function}())
 
 const BufferLike = Union{MIState,ModeState,IOBuffer}
 const State = Union{MIState,ModeState}
@@ -381,7 +382,13 @@ function check_for_hint(s::MIState)
         # Requires making space for them earlier in refresh_multi_line
         return clear_hint(st)
     end
-    completions, partial, should_complete = complete_line(st.p.complete, st, s.active_module; hint = true)::Tuple{Vector{String},String,Bool}
+
+    completions, partial, should_complete = try
+        complete_line(st.p.complete, st, s.active_module; hint = true)::Tuple{Vector{String},String,Bool}
+    catch
+        @debug "error completing line for hint" exception=current_exceptions()
+        return clear_hint(st)
+    end
     isempty(completions) && return clear_hint(st)
     # Don't complete for single chars, given e.g. `x` completes to `xor`
     if length(partial) > 1 && should_complete
@@ -741,7 +748,26 @@ function edit_move_right(buf::IOBuffer)
     end
     return false
 end
-edit_move_right(s::PromptState) = edit_move_right(s.input_buffer) ? refresh_line(s) : false
+function edit_move_right(m::MIState)
+    s = state(m)
+    buf = s.input_buffer
+    if edit_move_right(s.input_buffer)
+        refresh_line(s)
+        return true
+    else
+        completions, partial, should_complete = complete_line(s.p.complete, s, m.active_module)
+        if should_complete && eof(buf) && length(completions) == 1 && length(partial) > 1
+            # Replace word by completion
+            prev_pos = position(s)
+            push_undo(s)
+            edit_splice!(s, (prev_pos - sizeof(partial)) => prev_pos, completions[1])
+            refresh_line(state(s))
+            return true
+        else
+            return false
+        end
+    end
+end
 
 function edit_move_word_right(s::PromptState)
     if !eof(s.input_buffer)
@@ -1491,13 +1517,11 @@ end
 
 current_word_with_dots(s::MIState) = current_word_with_dots(buffer(s))
 
-previous_active_module::Module = Main
-
 function activate_module(s::MIState)
     word = current_word_with_dots(s);
     empty = isempty(word)
     mod = if empty
-        previous_active_module
+        s.previous_active_module
     else
         try
             Base.Core.eval(Base.active_module(), Base.Meta.parse(word))
@@ -1513,7 +1537,7 @@ function activate_module(s::MIState)
     if Base.active_module() == Main || mod == Main
         # At least one needs to be Main. Disallows toggling between two non-Main modules because it's
         # otherwise hard to get back to Main
-        global previous_active_module = Base.active_module()
+        s.previous_active_module = Base.active_module()
     end
     REPL.activate(mod)
     edit_clear(s)
diff --git a/stdlib/REPL/src/Pkg_beforeload.jl b/stdlib/REPL/src/Pkg_beforeload.jl
index 56c4e2562f7e6..472fbc924668d 100644
--- a/stdlib/REPL/src/Pkg_beforeload.jl
+++ b/stdlib/REPL/src/Pkg_beforeload.jl
@@ -71,7 +71,9 @@ end
 function projname(project_file::String)
     if isfile(project_file)
         name = try
-            p = Base.TOML.Parser()
+            # The `nothing` here means that this TOML parser does not return proper Dates.jl
+            # objects - but that's OK since we're just checking the name here.
+            p = Base.TOML.Parser{nothing}()
             Base.TOML.reinit!(p, read(project_file, String); filepath=project_file)
             proj = Base.TOML.parse(p)
             get(proj, "name", nothing)
@@ -86,7 +88,7 @@ function projname(project_file::String)
     end
     for depot in Base.DEPOT_PATH
         envdir = joinpath(depot, "environments")
-        if startswith(abspath(project_file), abspath(envdir))
+        if startswith(safe_realpath(project_file), safe_realpath(envdir))
             return "@" * name
         end
     end
@@ -125,5 +127,5 @@ function Pkg_promptf()
         end
     end
     # Note no handling of Pkg.offline, as the Pkg version does here
-    return "$(prefix)pkg> "
+    return "$(prefix)$(PKG_PROMPT)"
 end
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index 6eb22b2360f3c..272b907165341 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -33,35 +33,19 @@ function UndefVarError_hint(io::IO, ex::UndefVarError)
     if isdefined(ex, :scope)
         scope = ex.scope
         if scope isa Module
-            bnd = ccall(:jl_get_module_binding, Any, (Any, Any, Cint), scope, var, true)::Core.Binding
-            if isdefined(bnd, :owner)
-                owner = bnd.owner
-                if owner === bnd
-                    print(io, "\nSuggestion: add an appropriate import or assignment. This global was declared but not assigned.")
-                end
+            bpart = Base.lookup_binding_partition(Base.get_world_counter(), GlobalRef(scope, var))
+            kind = Base.binding_kind(bpart)
+            if kind === Base.BINDING_KIND_GLOBAL || kind === Base.BINDING_KIND_CONST || kind == Base.BINDING_KIND_DECLARED
+                print(io, "\nSuggestion: add an appropriate import or assignment. This global was declared but not assigned.")
+            elseif kind === Base.BINDING_KIND_FAILED
+                print(io, "\nHint: It looks like two or more modules export different ",
+                "bindings with this name, resulting in ambiguity. Try explicitly ",
+                "importing it from a particular module, or qualifying the name ",
+                "with the module it should come from.")
+            elseif kind === Base.BINDING_KIND_GUARD
+                print(io, "\nSuggestion: check for spelling errors or missing imports.")
             else
-                owner = ccall(:jl_binding_owner, Ptr{Cvoid}, (Any, Any), scope, var)
-                if C_NULL == owner
-                    # No global of this name exists in this module.
-                    # This is the common case, so do not print that information.
-                    # It could be the binding was exported by two modules, which we can detect
-                    # by the `usingfailed` flag in the binding:
-                    if isdefined(bnd, :flags) && Bool(bnd.flags >> 4 & 1) # magic location of the `usingfailed` flag
-                        print(io, "\nHint: It looks like two or more modules export different ",
-                              "bindings with this name, resulting in ambiguity. Try explicitly ",
-                              "importing it from a particular module, or qualifying the name ",
-                              "with the module it should come from.")
-                    else
-                        print(io, "\nSuggestion: check for spelling errors or missing imports.")
-                    end
-                    owner = bnd
-                else
-                    owner = unsafe_pointer_to_objref(owner)::Core.Binding
-                end
-            end
-            if owner !== bnd
-                # this could use jl_binding_dbgmodule for the exported location in the message too
-                print(io, "\nSuggestion: this global was defined as `$(owner.globalref)` but not assigned a value.")
+                print(io, "\nSuggestion: this global was defined as `$(bpart.restriction.globalref)` but not assigned a value.")
             end
         elseif scope === :static_parameter
             print(io, "\nSuggestion: run Test.detect_unbound_args to detect method arguments that do not fully constrain a type parameter.")
@@ -90,7 +74,17 @@ end
 function _UndefVarError_warnfor(io::IO, m::Module, var::Symbol)
     Base.isbindingresolved(m, var) || return false
     (Base.isexported(m, var) || Base.ispublic(m, var)) || return false
-    print(io, "\nHint: a global variable of this name also exists in $m.")
+    active_mod = Base.active_module()
+    print(io, "\nHint: ")
+    if isdefined(active_mod, Symbol(m))
+        print(io, "a global variable of this name also exists in $m.")
+    else
+        if Symbol(m) == var
+            print(io, "$m is loaded but not imported in the active module $active_mod.")
+        else
+            print(io, "a global variable of this name may be made accessible by importing $m in the current active module $active_mod")
+        end
+    end
     return true
 end
 
@@ -101,6 +95,7 @@ function __init__()
 end
 
 using Base.Meta, Sockets, StyledStrings
+using JuliaSyntaxHighlighting
 import InteractiveUtils
 
 export
@@ -109,6 +104,8 @@ export
     LineEditREPL,
     StreamREPL
 
+public TerminalMenus
+
 import Base:
     AbstractDisplay,
     display,
@@ -127,7 +124,7 @@ include("options.jl")
 
 include("LineEdit.jl")
 using .LineEdit
-import ..LineEdit:
+import .LineEdit:
     CompletionProvider,
     HistoryProvider,
     add_history,
@@ -326,7 +323,7 @@ function warn_on_non_owning_accesses(current_mod, ast)
     end
     return ast
 end
-warn_on_non_owning_accesses(ast) = warn_on_non_owning_accesses(REPL.active_module(), ast)
+warn_on_non_owning_accesses(ast) = warn_on_non_owning_accesses(Base.active_module(), ast)
 
 const repl_ast_transforms = Any[softscope, warn_on_non_owning_accesses] # defaults for new REPL backends
 
@@ -494,7 +491,7 @@ end
 function display(d::REPLDisplay, mime::MIME"text/plain", x)
     x = Ref{Any}(x)
     with_repl_linfo(d.repl) do io
-        io = IOContext(io, :limit => true, :module => active_module(d)::Module)
+        io = IOContext(io, :limit => true, :module => Base.active_module(d)::Module)
         if d.repl isa LineEditREPL
             mistate = d.repl.mistate
             mode = LineEdit.mode(mistate)
@@ -507,17 +504,24 @@ function display(d::REPLDisplay, mime::MIME"text/plain", x)
             # this can override the :limit property set initially
             io = foldl(IOContext, d.repl.options.iocontext, init=io)
         end
-        show(io, mime, x[])
+        show_repl(io, mime, x[])
         println(io)
     end
     return nothing
 end
+
 display(d::REPLDisplay, x) = display(d, MIME("text/plain"), x)
 
+show_repl(io::IO, mime::MIME"text/plain", x) = show(io, mime, x)
+
+show_repl(io::IO, ::MIME"text/plain", ex::Expr) =
+    print(io, JuliaSyntaxHighlighting.highlight(
+        sprint(show, ex, context=IOContext(io, :color => false))))
+
 function print_response(repl::AbstractREPL, response, show_value::Bool, have_color::Bool)
     repl.waserror = response[2]
     with_repl_linfo(repl) do io
-        io = IOContext(io, :module => active_module(repl)::Module)
+        io = IOContext(io, :module => Base.active_module(repl)::Module)
         print_response(io, response, show_value, have_color, specialdisplay(repl))
     end
     return nothing
@@ -618,7 +622,7 @@ function run_repl(repl::AbstractREPL, @nospecialize(consumer = x -> nothing); ba
             Core.println(Core.stderr, e)
             Core.println(Core.stderr, catch_backtrace())
         end
-    get_module = () -> active_module(repl)
+    get_module = () -> Base.active_module(repl)
     if backend_on_current_task
         t = @async run_frontend(repl, backend_ref)
         errormonitor(t)
@@ -750,13 +754,9 @@ REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
-function active_module() # this method is also called from Base
-    isdefined(Base, :active_repl) || return Main
-    return active_module(Base.active_repl::AbstractREPL)
-end
-active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
-active_module(::AbstractREPL) = Main
-active_module(d::REPLDisplay) = active_module(d.repl)
+Base.active_module((; mistate)::LineEditREPL) = mistate === nothing ? Main : mistate.active_module
+Base.active_module(::AbstractREPL) = Main
+Base.active_module(d::REPLDisplay) = Base.active_module(d.repl)
 
 setmodifiers!(c::CompletionProvider, m::LineEdit.Modifiers) = nothing
 
@@ -768,11 +768,11 @@ setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers =
 Set `mod` as the default contextual module in the REPL,
 both for evaluating expressions and printing them.
 """
-function activate(mod::Module=Main)
+function activate(mod::Module=Main; interactive_utils::Bool=true)
     mistate = (Base.active_repl::LineEditREPL).mistate
     mistate === nothing && return nothing
     mistate.active_module = mod
-    Base.load_InteractiveUtils(mod)
+    interactive_utils && Base.load_InteractiveUtils(mod)
     return nothing
 end
 
@@ -1195,7 +1195,7 @@ enable_promptpaste(v::Bool) = JL_PROMPT_PASTE[] = v
 
 function contextual_prompt(repl::LineEditREPL, prompt::Union{String,Function})
     function ()
-        mod = active_module(repl)
+        mod = Base.active_module(repl)
         prefix = mod == Main ? "" : string('(', mod, ") ")
         pr = prompt isa String ? prompt : prompt()
         prefix * pr
@@ -1257,7 +1257,7 @@ function setup_interface(
         on_enter = return_callback)
 
     # Setup help mode
-    help_mode = Prompt(contextual_prompt(repl, "help?> "),
+    help_mode = Prompt(contextual_prompt(repl, HELP_PROMPT),
         prompt_prefix = hascolor ? repl.help_color : "",
         prompt_suffix = hascolor ?
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
@@ -1354,8 +1354,8 @@ function setup_interface(
 
     shell_prompt_len = length(SHELL_PROMPT)
     help_prompt_len = length(HELP_PROMPT)
-    jl_prompt_regex = r"^In \[[0-9]+\]: |^(?:\(.+\) )?julia> "
-    pkg_prompt_regex = r"^(?:\(.+\) )?pkg> "
+    jl_prompt_regex = Regex("^In \\[[0-9]+\\]: |^(?:\\(.+\\) )?$JULIA_PROMPT")
+    pkg_prompt_regex = Regex("^(?:\\(.+\\) )?$PKG_PROMPT")
 
     # Canonicalize user keymap input
     if isa(extra_repl_keymap, Dict)
@@ -1769,7 +1769,7 @@ function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
         if have_color
             print(repl.stream,repl.prompt_color)
         end
-        print(repl.stream, "julia> ")
+        print(repl.stream, JULIA_PROMPT)
         if have_color
             print(repl.stream, input_color(repl))
         end
@@ -1793,7 +1793,7 @@ module Numbered
 
 using ..REPL
 
-__current_ast_transforms() = isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+__current_ast_transforms() = Base.active_repl_backend !== nothing ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
 
 function repl_eval_counter(hp)
     return length(hp.history) - hp.start_idx
@@ -1855,13 +1855,13 @@ end
 
 function __current_ast_transforms(backend)
     if backend === nothing
-        isdefined(Base, :active_repl_backend) ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
+        Base.active_repl_backend !== nothing ? Base.active_repl_backend.ast_transforms : REPL.repl_ast_transforms
     else
         backend.ast_transforms
     end
 end
 
-function numbered_prompt!(repl::LineEditREPL=Base.active_repl, backend=nothing)
+function numbered_prompt!(repl::LineEditREPL=Base.active_repl::LineEditREPL, backend=nothing)
     n = Ref{Int}(0)
     set_prompt(repl, n)
     set_output_prefix(repl, n)
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index db8045cde906a..77f7fdf15cc9c 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -295,7 +295,10 @@ function maybe_spawn_cache_PATH()
     @lock PATH_cache_lock begin
         PATH_cache_task isa Task && !istaskdone(PATH_cache_task) && return
         time() < next_cache_update && return
-        PATH_cache_task = Threads.@spawn REPLCompletions.cache_PATH()
+        PATH_cache_task = Threads.@spawn begin
+            REPLCompletions.cache_PATH()
+            @lock PATH_cache_lock PATH_cache_task = nothing # release memory when done
+        end
         Base.errormonitor(PATH_cache_task)
     end
 end
@@ -553,8 +556,7 @@ struct REPLInterpreter <: CC.AbstractInterpreter
     function REPLInterpreter(limit_aggressive_inference::Bool=false;
                              world::UInt = Base.get_world_counter(),
                              inf_params::CC.InferenceParams = CC.InferenceParams(;
-                                 aggressive_constant_propagation=true,
-                                 unoptimize_throw_blocks=false),
+                                 aggressive_constant_propagation=true),
                              opt_params::CC.OptimizationParams = CC.OptimizationParams(),
                              inf_cache::Vector{CC.InferenceResult} = CC.InferenceResult[])
         return new(limit_aggressive_inference, world, inf_params, opt_params, inf_cache)
@@ -601,7 +603,7 @@ is_repl_frame(sv::CC.InferenceState) = sv.linfo.def isa Module && sv.cache_mode
 
 function is_call_graph_uncached(sv::CC.InferenceState)
     CC.is_cached(sv) && return false
-    parent = sv.parent
+    parent = CC.frame_parent(sv)
     parent === nothing && return true
     return is_call_graph_uncached(parent::CC.InferenceState)
 end
@@ -624,7 +626,7 @@ function is_repl_frame_getproperty(sv::CC.InferenceState)
     def isa Method || return false
     def.name === :getproperty || return false
     CC.is_cached(sv) && return false
-    return is_repl_frame(sv.parent)
+    return is_repl_frame(CC.frame_parent(sv))
 end
 
 # aggressive global binding resolution for `getproperty(::Module, ::Symbol)` calls within `repl_frame`
@@ -897,8 +899,11 @@ const superscript_regex = Regex("^\\\\\\^[" * join(isdigit(k) || isletter(k) ? "
 
 # Aux function to detect whether we're right after a using or import keyword
 function get_import_mode(s::String)
+    # allow all of these to start with leading whitespace and macros like @eval and @eval(
+    # ^\s*(?:@\w+\s*(?:\(\s*)?)?
+
     # match simple cases like `using |` and `import  |`
-    mod_import_match_simple = match(r"^\b(using|import)\s*$", s)
+    mod_import_match_simple = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s*$", s)
     if mod_import_match_simple !== nothing
         if mod_import_match_simple[1] == "using"
             return :using_module
@@ -907,7 +912,7 @@ function get_import_mode(s::String)
         end
     end
     # match module import statements like `using Foo|`, `import Foo, Bar|` and `using Foo.Bar, Baz, |`
-    mod_import_match = match(r"^\b(using|import)\s+([\w\.]+(?:\s*,\s*[\w\.]+)*),?\s*$", s)
+    mod_import_match = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s+([\w\.]+(?:\s*,\s*[\w\.]+)*),?\s*$", s)
     if mod_import_match !== nothing
         if mod_import_match.captures[1] == "using"
             return :using_module
@@ -916,7 +921,7 @@ function get_import_mode(s::String)
         end
     end
     # now match explicit name import statements like `using Foo: |` and `import Foo: bar, baz|`
-    name_import_match = match(r"^\b(using|import)\s+([\w\.]+)\s*:\s*([\w@!\s,]+)$", s)
+    name_import_match = match(r"^\s*(?:@\w+\s*(?:\(\s*)?)?\b(using|import)\s+([\w\.]+)\s*:\s*([\w@!\s,]+)$", s)
     if name_import_match !== nothing
         if name_import_match[1] == "using"
             return :using_name
diff --git a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
index 9fcddef2fd484..f970cd9a289c2 100644
--- a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
+++ b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
@@ -1,8 +1,15 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+"""
+    REPL.TerminalMenus
+
+A module that contains code for displaying text mode interactive menus.
+Key exported symbols include [`REPL.TerminalMenus.RadioMenu`](@ref) and
+[`REPL.TerminalMenus.MultiSelectMenu`](@ref).
+"""
 module TerminalMenus
 
-using REPL: REPL
+using ..REPL: REPL
 
 function default_terminal(; in::IO=stdin, out::IO=stdout, err::IO=stderr)
     return REPL.Terminals.TTYTerminal(
@@ -23,6 +30,9 @@ export
     Pager,
     request
 
+public Config, config, MultiSelectConfig
+public pick, cancel, writeline, options, numoptions, selected, header, keypress
+
 # TODO: remove in Julia 2.0
 # While not exported, AbstractMenu documented these as an extension interface
 @deprecate printMenu printmenu
diff --git a/stdlib/REPL/src/Terminals.jl b/stdlib/REPL/src/Terminals.jl
index 821ed224f1829..0cf6888d248e8 100644
--- a/stdlib/REPL/src/Terminals.jl
+++ b/stdlib/REPL/src/Terminals.jl
@@ -97,6 +97,7 @@ abstract type UnixTerminal <: TextTerminal end
 pipe_reader(t::UnixTerminal) = t.in_stream::IO
 pipe_writer(t::UnixTerminal) = t.out_stream::IO
 
+@nospecialize
 mutable struct TerminalBuffer <: UnixTerminal
     out_stream::IO
 end
@@ -107,6 +108,7 @@ mutable struct TTYTerminal <: UnixTerminal
     out_stream::IO
     err_stream::IO
 end
+@specialize
 
 const CSI = "\x1b["
 
@@ -118,10 +120,8 @@ cmove_line_up(t::UnixTerminal, n) = (cmove_up(t, n); cmove_col(t, 1))
 cmove_line_down(t::UnixTerminal, n) = (cmove_down(t, n); cmove_col(t, 1))
 cmove_col(t::UnixTerminal, n) = (write(t.out_stream, '\r'); n > 1 && cmove_right(t, n-1))
 
-const is_precompiling = Ref(false)
 if Sys.iswindows()
     function raw!(t::TTYTerminal,raw::Bool)
-        is_precompiling[] && return true
         check_open(t.in_stream)
         if Base.ispty(t.in_stream)
             run((raw ? `stty raw -echo onlcr -ocrnl opost` : `stty sane`),
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index feeeecbd97165..3c5e102bb657e 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -13,8 +13,6 @@ using Base: with_output_color, mapany, isdeprecated, isexported
 
 using Base.Filesystem: _readdirx
 
-import REPL
-
 using InteractiveUtils: subtypes
 
 using Unicode: normalize
@@ -25,7 +23,7 @@ using Unicode: normalize
 function helpmode(io::IO, line::AbstractString, mod::Module=Main)
     internal_accesses = Set{Pair{Module,Symbol}}()
     quote
-        docs = $REPL.insert_hlines($(REPL._helpmode(io, line, mod, internal_accesses)))
+        docs = $Markdown.insert_hlines($(REPL._helpmode(io, line, mod, internal_accesses)))
         $REPL.insert_internal_warning(docs, $internal_accesses)
     end
 end
@@ -78,26 +76,13 @@ function _helpmode(io::IO, line::AbstractString, mod::Module=Main, internal_acce
 end
 _helpmode(line::AbstractString, mod::Module=Main) = _helpmode(stdout, line, mod)
 
-# Print horizontal lines between each docstring if there are multiple docs
-function insert_hlines(docs)
-    if !isa(docs, Markdown.MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
-        return docs
-    end
-    docs = docs::Markdown.MD
-    v = Any[]
-    for (n, doc) in enumerate(docs.content)
-        push!(v, doc)
-        n == length(docs.content) || push!(v, Markdown.HorizontalRule())
-    end
-    return Markdown.MD(v)
-end
-
 function formatdoc(d::DocStr)
     buffer = IOBuffer()
     for part in d.text
         formatdoc(buffer, d, part)
     end
-    Markdown.MD(Any[Markdown.parse(seekstart(buffer))])
+    md = Markdown.MD(Any[Markdown.parse(seekstart(buffer))])
+    assume_julia_code!(md)
 end
 @noinline formatdoc(buffer, d, part) = print(buffer, part)
 
@@ -111,6 +96,27 @@ function parsedoc(d::DocStr)
     d.object
 end
 
+"""
+    assume_julia_code!(doc::Markdown.MD) -> doc
+
+Assume that code blocks with no language specified are Julia code.
+"""
+function assume_julia_code!(doc::Markdown.MD)
+    assume_julia_code!(doc.content)
+    doc
+end
+
+function assume_julia_code!(blocks::Vector)
+    for (i, block) in enumerate(blocks)
+        if block isa Markdown.Code && block.language == ""
+            blocks[i] = Markdown.Code("julia", block.code)
+        elseif block isa Vector || block isa Markdown.MD
+            assume_julia_code!(block)
+        end
+    end
+    blocks
+end
+
 ## Trimming long help ("# Extended help")
 
 struct Message  # For direct messages to the terminal
@@ -475,7 +481,7 @@ repl_corrections(s) = repl_corrections(stdout, s)
 # inverse of latex_symbols Dict, lazily created as needed
 const symbols_latex = Dict{String,String}()
 function symbol_latex(s::String)
-    if isempty(symbols_latex) && isassigned(Base.REPL_MODULE_REF)
+    if isempty(symbols_latex)
         for (k,v) in Iterators.flatten((REPLCompletions.latex_symbols,
                                         REPLCompletions.emoji_symbols))
             symbols_latex[v] = k
diff --git a/stdlib/REPL/src/precompile.jl b/stdlib/REPL/src/precompile.jl
index b55f825e6a423..f7961a205e0b1 100644
--- a/stdlib/REPL/src/precompile.jl
+++ b/stdlib/REPL/src/precompile.jl
@@ -1,15 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 module Precompile
-# Can't use this during incremental: `@eval Module() begin``
 
 import ..REPL
-# Prepare this staging area with all the loaded packages available
-for (_pkgid, _mod) in Base.loaded_modules
-    if !(_pkgid.name in ("Main", "Core", "Base", "REPL"))
-        eval(:(const $(Symbol(_mod)) = $_mod))
-    end
-end
 
 # Ugly hack for our cache file to not have a dependency edge on the FakePTYs file.
 Base._track_dependencies[] = false
@@ -19,93 +12,99 @@ try
 finally
     Base._track_dependencies[] = true
 end
-using Base.Meta
-
-import Markdown
-import StyledStrings
-
-## Debugging options
-# Disable parallel precompiles generation by setting `false`
-const PARALLEL_PRECOMPILATION = true
-
-# View the code sent to the repl by setting this to `stdout`
-const debug_output = devnull # or stdout
-
-CTRL_C = '\x03'
-CTRL_D = '\x04'
-CTRL_R = '\x12'
-UP_ARROW = "\e[A"
-DOWN_ARROW = "\e[B"
-
-repl_script = """
-2+2
-print("")
-printstyled("a", "b")
-display([1])
-display([1 2; 3 4])
-foo(x) = 1
-@time @eval foo(1)
-; pwd
-$CTRL_C
-$CTRL_R$CTRL_C#
-? reinterpret
-using Ra\t$CTRL_C
-\\alpha\t$CTRL_C
-\e[200~paste here ;)\e[201~"$CTRL_C
-$UP_ARROW$DOWN_ARROW$CTRL_C
-123\b\b\b$CTRL_C
-\b\b$CTRL_C
-f(x) = x03
-f(1,2)
-[][1]
-Base.Iterators.minimum
-cd("complete_path\t\t$CTRL_C
-"""
-
-julia_exepath() = joinpath(Sys.BINDIR, Base.julia_exename())
-
-const JULIA_PROMPT = "julia> "
-const PKG_PROMPT = "pkg> "
-const SHELL_PROMPT = "shell> "
-const HELP_PROMPT = "help?> "
 
-blackhole = Sys.isunix() ? "/dev/null" : "nul"
-procenv = Dict{String,Any}(
-        "JULIA_HISTORY" => blackhole,
-        "JULIA_PROJECT" => nothing, # remove from environment
-        "JULIA_LOAD_PATH" => "@stdlib",
-        "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
-        "TERM" => "",
-        "JULIA_FALLBACK_REPL" => "0") # Turn REPL.jl on in subprocess
-
-generate_precompile_statements() = try
-    # Extract the precompile statements from the precompile file
-    statements_step = Channel{String}(Inf)
+function repl_workload()
+    # these are intentionally triggered
+    allowed_errors = [
+        "BoundsError: attempt to access 0-element Vector{Any} at index [1]",
+        "MethodError: no method matching f(::$Int, ::$Int)",
+        "Padding of type", # reinterpret docstring has ERROR examples
+    ]
+    function check_errors(out)
+        str = String(out)
+        if occursin("ERROR:", str) && !any(occursin(e, str) for e in allowed_errors)
+            @error "Unexpected error (Review REPL precompilation with debug_output on):\n$str"
+            exit(1)
+        end
+    end
+    ## Debugging options
+    # View the code sent to the repl by setting this to `stdout`
+    debug_output = devnull # or stdout
+
+    CTRL_C = '\x03'
+    CTRL_D = '\x04'
+    CTRL_R = '\x12'
+    UP_ARROW = "\e[A"
+    DOWN_ARROW = "\e[B"
+
+    # This is notified as soon as the first prompt appears
+    repl_init_event = Base.Event()
+
+    atreplinit() do repl
+        # Main is closed so we can't evaluate in it, but atreplinit runs at
+        # a time that repl.mistate === nothing so REPL.activate fails. So do
+        # it async and wait for the first prompt to know its ready.
+        t = @async begin
+            wait(repl_init_event)
+            REPL.activate(REPL.Precompile; interactive_utils=false)
+        end
+        Base.errormonitor(t)
+    end
 
-    step = @async mktemp() do precompile_file, precompile_file_h
-        # Collect statements from running a REPL process and replaying our REPL script
-        touch(precompile_file)
-        pts, ptm = open_fake_pty()
-        # we don't want existing REPL caches to be used so ignore them
-        setup_cmd = """
-        push!(Base.ignore_compiled_cache, Base.PkgId(Base.UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL"))
-        import REPL
-        REPL.Terminals.is_precompiling[] = true
-        """
-        p = run(
-                addenv(```$(julia_exepath()) -O0 --trace-compile=$precompile_file
-                    --cpu-target=native --startup-file=no --compiled-modules=existing
-                    --color=yes -i -e "$setup_cmd"```, procenv),
-                pts, pts, pts; wait=false
-            )
-        Base.close_stdio(pts)
-        # Prepare a background process to copy output from process until `pts` is closed
+    repl_script = """
+    2+2
+    print("")
+    printstyled("a", "b")
+    display([1])
+    display([1 2; 3 4])
+    foo(x) = 1
+    @time @eval foo(1)
+    ; pwd
+    $CTRL_C
+    $CTRL_R$CTRL_C#
+    ? reinterpret
+    using Ra\t$CTRL_C
+    \\alpha\t$CTRL_C
+    \e[200~paste here ;)\e[201~"$CTRL_C
+    $UP_ARROW$DOWN_ARROW$CTRL_C
+    123\b\b\b$CTRL_C
+    \b\b$CTRL_C
+    f(x) = x03
+    f(1,2)
+    [][1]
+    Base.Iterators.minimum
+    cd("complete_path\t\t$CTRL_C
+    println("done")
+    """
+
+    JULIA_PROMPT = "julia> "
+    PKG_PROMPT = "pkg> "
+    SHELL_PROMPT = "shell> "
+    HELP_PROMPT = "help?> "
+
+    blackhole = Sys.isunix() ? "/dev/null" : "nul"
+
+    withenv("JULIA_HISTORY" => blackhole,
+            "JULIA_PROJECT" => nothing, # remove from environment
+            "JULIA_LOAD_PATH" => "@stdlib",
+            "JULIA_DEPOT_PATH" => Sys.iswindows() ? ";" : ":",
+            "TERM" => "",
+            "JULIA_FALLBACK_REPL" => "0" # Make sure REPL.jl is turned on
+            ) do
+        rawpts, ptm = open_fake_pty()
+        pts = open(rawpts)::Base.TTY
+        if Sys.iswindows()
+            pts.ispty = false
+        else
+            # workaround libuv bug where it leaks pts
+            Base._fd(pts) == rawpts || Base.close_stdio(rawpts)
+        end
+        # Prepare a background process to copy output from `ptm` until `pts` is closed
         output_copy = Base.BufferStream()
         tee = @async try
             while !eof(ptm)
                 l = readavailable(ptm)
                 write(debug_output, l)
-                Sys.iswindows() && (sleep(0.1); yield(); yield()) # workaround hang - probably a libuv issue?
                 write(output_copy, l)
             end
             write(debug_output, "\n#### EOF ####\n")
@@ -118,11 +117,33 @@ generate_precompile_statements() = try
             close(ptm)
         end
         Base.errormonitor(tee)
-        repl_inputter = @async begin
+        orig_stdin = stdin
+        orig_stdout = stdout
+        orig_stderr = stderr
+        repltask = @task try
+            Base.run_std_repl(REPL, false, :yes, true)
+        finally
+            redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull)
+            redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull)
+            close(pts)
+        end
+        Base.errormonitor(repltask)
+        try
+            Base.REPL_MODULE_REF[] = REPL
+            redirect_stdin(pts)
+            redirect_stdout(pts)
+            redirect_stderr(pts)
+            try
+                REPL.print_qualified_access_warning(Base.Iterators, Base, :minimum) # trigger the warning while stderr is suppressed
+            finally
+                redirect_stderr(isopen(orig_stderr) ? orig_stderr : devnull)
+            end
+            schedule(repltask)
             # wait for the definitive prompt before start writing to the TTY
-            readuntil(output_copy, JULIA_PROMPT)
+            check_errors(readuntil(output_copy, JULIA_PROMPT))
+            write(debug_output, "\n#### REPL STARTED ####\n")
             sleep(0.1)
-            readavailable(output_copy)
+            check_errors(readavailable(output_copy))
             # Input our script
             precompile_lines = split(repl_script::String, '\n'; keepempty=false)
             curr = 0
@@ -130,16 +151,16 @@ generate_precompile_statements() = try
                 sleep(0.1)
                 curr += 1
                 # consume any other output
-                bytesavailable(output_copy) > 0 && readavailable(output_copy)
+                bytesavailable(output_copy) > 0 && check_errors(readavailable(output_copy))
                 # push our input
                 write(debug_output, "\n#### inputting statement: ####\n$(repr(l))\n####\n")
                 # If the line ends with a CTRL_C, don't write an extra newline, which would
                 # cause a second empty prompt. Our code below expects one new prompt per
                 # input line and can race out of sync with the unexpected second line.
                 endswith(l, CTRL_C) ? write(ptm, l) : write(ptm, l, "\n")
-                readuntil(output_copy, "\n")
+                check_errors(readuntil(output_copy, "\n"))
                 # wait for the next prompt-like to appear
-                readuntil(output_copy, "\n")
+                check_errors(readuntil(output_copy, "\n"))
                 strbuf = ""
                 while !eof(output_copy)
                     strbuf *= String(readavailable(output_copy))
@@ -149,80 +170,55 @@ generate_precompile_statements() = try
                     occursin(HELP_PROMPT, strbuf) && break
                     sleep(0.1)
                 end
+                notify(repl_init_event)
+                check_errors(strbuf)
             end
             write(debug_output, "\n#### COMPLETED - Closing REPL ####\n")
             write(ptm, "$CTRL_D")
-            wait(tee)
-            success(p) || Base.pipeline_error(p)
-            close(ptm)
-            write(debug_output, "\n#### FINISHED ####\n")
-        end
-        Base.errormonitor(repl_inputter)
-
-        n_step = 0
-        precompile_copy = Base.BufferStream()
-        buffer_reader = @async for statement in eachline(precompile_copy)
-            push!(statements_step, statement)
-            n_step += 1
+            wait(repltask)
+        finally
+            redirect_stdin(isopen(orig_stdin) ? orig_stdin : devnull)
+            redirect_stdout(isopen(orig_stdout) ? orig_stdout : devnull)
+            close(pts)
         end
+        wait(tee)
+    end
+    write(debug_output, "\n#### FINISHED ####\n")
+    nothing
+end
 
-        open(precompile_file, "r") do io
-            while true
-                # We need to always call eof(io) for bytesavailable(io) to work
-                eof(io) && istaskdone(repl_inputter) && eof(io) && break
-                if bytesavailable(io) == 0
-                    sleep(0.1)
-                    continue
-                end
-                write(precompile_copy, readavailable(io))
+# Copied from PrecompileTools.jl
+let
+    function check_edges(node)
+        parentmi = node.mi_info.mi
+        for child in node.children
+            childmi = child.mi_info.mi
+            if !(isdefined(childmi, :backedges) && parentmi ∈ childmi.backedges)
+                precompile(childmi.specTypes)
             end
+            check_edges(child)
         end
-        close(precompile_copy)
-        wait(buffer_reader)
-        return :ok
     end
-    !PARALLEL_PRECOMPILATION && wait(step)
-    bind(statements_step, step)
 
-    # Make statements unique
-    statements = Set{String}()
-    # Execute the precompile statements
-    for statement in statements_step
-        # Main should be completely clean
-        occursin("Main.", statement) && continue
-        Base.in!(statement, statements) && continue
+    if Base.generating_output() && Base.JLOptions().use_pkgimages != 0
+        Core.Compiler.Timings.reset_timings()
+        Core.Compiler.__set_measure_typeinf(true)
         try
-            ps = Meta.parse(statement)
-            if !isexpr(ps, :call)
-                # these are typically comments
-                @debug "skipping statement because it does not parse as an expression" statement
-                delete!(statements, statement)
-                continue
-            end
-            popfirst!(ps.args) # precompile(...)
-            ps.head = :tuple
-            # println(ps)
-            ps = eval(ps)
-            if !precompile(ps...)
-                @warn "Failed to precompile expression" form=statement _module=nothing _file=nothing _line=0
-            end
-        catch ex
-            # See #28808
-            @warn "Failed to precompile expression" form=statement exception=ex _module=nothing _file=nothing _line=0
+            repl_workload()
+        finally
+            Core.Compiler.__set_measure_typeinf(false)
+            Core.Compiler.Timings.close_current_timer()
+        end
+        roots = Core.Compiler.Timings._timings[1].children
+        for child in roots
+            precompile(child.mi_info.mi.specTypes)
+            check_edges(child)
         end
+        precompile(Tuple{typeof(Base.setindex!), Base.Dict{Any, Any}, Any, Int})
+        precompile(Tuple{typeof(Base.delete!), Base.Set{Any}, String})
+        precompile(Tuple{typeof(Base.:(==)), Char, String})
+        precompile(Tuple{typeof(Base.reseteof), Base.TTY})
     end
-
-    fetch(step) == :ok || throw("Collecting precompiles failed: $(c.excp)")
-    return nothing
-finally
-    GC.gc(true); GC.gc(false); # reduce memory footprint
 end
 
-generate_precompile_statements()
-
-precompile(Tuple{typeof(getproperty), REPL.REPLBackend, Symbol})
-precompile(Tuple{typeof(Base.take!), Base.Channel{Function}})
-precompile(Tuple{typeof(Base.put!), Base.Channel{Function}, Function})
-precompile(Tuple{typeof(Core.kwcall), NamedTuple{names, T} where T<:Tuple where names, typeof(REPL.LineEdit.complete_line), REPL.LineEdit.EmptyCompletionProvider, Any})
-
 end # Precompile
diff --git a/stdlib/REPL/test/docview.jl b/stdlib/REPL/test/docview.jl
index 123ff820bc939..6b374ed7f0149 100644
--- a/stdlib/REPL/test/docview.jl
+++ b/stdlib/REPL/test/docview.jl
@@ -28,13 +28,6 @@ end
     @test occursin("Couldn't find 'mutable s'", str)
 end
 
-@testset "Non-Markdown" begin
-    # https://github.com/JuliaLang/julia/issues/37765
-    @test isa(REPL.insert_hlines(Markdown.Text("foo")), Markdown.Text)
-    # https://github.com/JuliaLang/julia/issues/37757
-    @test REPL.insert_hlines(nothing) === nothing
-end
-
 @testset "Check @var_str also completes to var\"\" in REPL.doc_completions()" begin
     checks = ["var", "raw", "r"]
     symbols = "@" .* checks .* "_str"
diff --git a/stdlib/REPL/test/precompilation.jl b/stdlib/REPL/test/precompilation.jl
index 228cbd212a2c1..7efcf0b5e8282 100644
--- a/stdlib/REPL/test/precompilation.jl
+++ b/stdlib/REPL/test/precompilation.jl
@@ -15,8 +15,11 @@ if !Sys.iswindows()
     @testset "No interactive startup compilation" begin
         f, _ = mktemp()
 
-        # start an interactive session
-        cmd = `$(Base.julia_cmd()[1]) --trace-compile=$f -q --startup-file=no -i`
+        # start an interactive session, ensuring `TERM` is unset since it can trigger
+        # different amounts of precompilation stemming from `base/terminfo.jl` depending
+        # on the value, making the test here unreliable
+        cmd = addenv(`$(Base.julia_cmd()[1]) --trace-compile=$f -q --startup-file=no -i`,
+                     Dict("TERM" => ""))
         pts, ptm = open_fake_pty()
         p = run(cmd, pts, pts, pts; wait=false)
         Base.close_stdio(pts)
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index 6427440c9d39e..85a8137fa003e 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -9,7 +9,7 @@ using Markdown
 
 empty!(Base.Experimental._hint_handlers) # unregister error hints so they can be tested separately
 
-@test isassigned(Base.REPL_MODULE_REF)
+@test Base.REPL_MODULE_REF[] === REPL
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :FakePTYs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "FakePTYs.jl"))
@@ -244,8 +244,9 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         @test occursin("shell> ", s) # check for the echo of the prompt
         @test occursin("'", s) # check for the echo of the input
         s = readuntil(stdout_read, "\n\n")
-        @test startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
-              startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] ")
+        @test(startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
+            startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] "),
+            skip = Sys.iswindows() && Sys.WORD_SIZE == 32)
         write(stdin_write, "\b")
         wait(t)
     end
@@ -1216,9 +1217,9 @@ global some_undef_global
 @test occursin("does not exist", sprint(show, help_result("..")))
 # test that helpmode is sensitive to contextual module
 @test occursin("No documentation found", sprint(show, help_result("Fix2", Main)))
-@test occursin("A type representing a partially-applied version", # exact string may change
+@test occursin("Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).", # exact string may change
                sprint(show, help_result("Base.Fix2", Main)))
-@test occursin("A type representing a partially-applied version", # exact string may change
+@test occursin("Alias for `Fix{2}`. See [`Fix`](@ref Base.Fix).", # exact string may change
                sprint(show, help_result("Fix2", Base)))
 
 
@@ -1650,12 +1651,12 @@ fake_repl() do stdin_write, stdout_read, repl
     write(stdin_write, "foobar\n")
     readline(stdout_read)
     @test readline(stdout_read) == "\e[0mERROR: UndefVarError: `foobar` not defined in `Main`"
-    @test readline(stdout_read) == ""
+    @test readline(stdout_read) == "" skip = Sys.iswindows() && Sys.WORD_SIZE == 32
     readuntil(stdout_read, "julia> ", keep=true)
     # check that top-level error did not change `err`
     write(stdin_write, "err\n")
     readline(stdout_read)
-    @test readline(stdout_read) == "\e[0m"
+    @test readline(stdout_read) == "\e[0m" skip = Sys.iswindows() && Sys.WORD_SIZE == 32
     readuntil(stdout_read, "julia> ", keep=true)
     # generate deeper error
     write(stdin_write, "foo() = foobar\n")
@@ -1965,11 +1966,20 @@ end
 
 @testset "Dummy Pkg prompt" begin
     # do this in an empty depot to test default for new users
-    withenv("JULIA_DEPOT_PATH" => mktempdir(), "JULIA_LOAD_PATH" => nothing) do
+    withenv("JULIA_DEPOT_PATH" => mktempdir() * (Sys.iswindows() ? ";" : ":"), "JULIA_LOAD_PATH" => nothing) do
         prompt = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no -e "using REPL; print(REPL.Pkg_promptf())"`)
         @test prompt == "(@v$(VERSION.major).$(VERSION.minor)) pkg> "
     end
 
+    # Issue 55850
+    tmp_55850 = mktempdir()
+    tmp_sym_link = joinpath(tmp_55850, "sym")
+    symlink(tmp_55850, tmp_sym_link; dir_target=true)
+    withenv("JULIA_DEPOT_PATH" => tmp_sym_link * (Sys.iswindows() ? ";" : ":"), "JULIA_LOAD_PATH" => nothing) do
+        prompt = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no -e "using REPL; print(REPL.projname(REPL.find_project_file()))"`)
+        @test prompt == "@v$(VERSION.major).$(VERSION.minor)"
+    end
+
     get_prompt(proj::String) = readchomp(`$(Base.julia_cmd()[1]) --startup-file=no $(proj) -e "using REPL; print(REPL.Pkg_promptf())"`)
 
     @test get_prompt("--project=$(pkgdir(REPL))") == "(REPL) pkg> "
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index 0b8989a2b88dc..3f8addcace73b 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -2238,6 +2238,26 @@ let s = "using .Iss"
     @test res
     @test "Issue52922" in c
 end
+let s = " using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "@time using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = " @time using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
+let s = "@time(using .Iss"
+    c, r, res = test_complete_context(s)
+    @test res
+    @test "Issue52922" in c
+end
 let s = "using .Issue52922.Inn"
     c, r, res = test_complete_context(s)
     @test res
@@ -2391,3 +2411,8 @@ let (c, r, res) = test_complete_context("const xxx = Base.si", Main)
     @test res
     @test "sin" ∈ c
 end
+
+let (c, r, res) = test_complete_context("global xxx::Number = Base.", Main)
+    @test res
+    @test "pi" ∈ c
+end
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index 9ce0896d0d125..26116d3bf4c81 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -29,6 +29,8 @@ export rand!, randn!,
        randcycle, randcycle!,
        AbstractRNG, MersenneTwister, RandomDevice, TaskLocalRNG, Xoshiro
 
+public seed!, default_rng, Sampler, SamplerType, SamplerTrivial, SamplerSimple
+
 ## general definitions
 
 """
diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl
index 5569d6d5c1da5..09a3e386e9a2b 100644
--- a/stdlib/Random/src/Xoshiro.jl
+++ b/stdlib/Random/src/Xoshiro.jl
@@ -185,8 +185,8 @@ end
     TaskLocalRNG
 
 The `TaskLocalRNG` has state that is local to its task, not its thread.
-It is seeded upon task creation, from the state of its parent task.
-Therefore, task creation is an event that changes the parent's RNG state.
+It is seeded upon task creation, from the state of its parent task, but without
+advancing the state of the parent's RNG.
 
 As an upside, the `TaskLocalRNG` is pretty fast, and permits reproducible
 multithreaded simulations (barring race conditions), independent of scheduler
@@ -203,6 +203,9 @@ may be any integer.
 
 !!! compat "Julia 1.11"
     Seeding `TaskLocalRNG()` with a negative integer seed requires at least Julia 1.11.
+
+!!! compat "Julia 1.10"
+    Task creation no longer advances the parent task's RNG state as of Julia 1.10.
 """
 struct TaskLocalRNG <: AbstractRNG end
 TaskLocalRNG(::Nothing) = TaskLocalRNG()
@@ -294,7 +297,7 @@ rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52{UInt64}})    = ran
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt104{UInt128}})  = rand(r, UInt104Raw())
 
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float16}}) =
-    Float16(Float32(rand(r, UInt16) >>> 5) * Float32(0x1.0p-11))
+    Float16(rand(r, UInt16) >>> 5) * Float16(0x1.0p-11)
 
 rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float32}}) =
     Float32(rand(r, UInt32) >>> 8) * Float32(0x1.0p-24)
diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl
index 6d4886f31d22b..1c5f8306cc302 100644
--- a/stdlib/Random/src/XoshiroSimd.jl
+++ b/stdlib/Random/src/XoshiroSimd.jl
@@ -44,6 +44,17 @@ simdThreshold(::Type{Bool}) = 640
     l = Float32(li >>> 8) * Float32(0x1.0p-24)
     (UInt64(reinterpret(UInt32, u)) << 32) | UInt64(reinterpret(UInt32, l))
 end
+@inline function _bits2float(x::UInt64, ::Type{Float16})
+    i1 = (x>>>48) % UInt16
+    i2 = (x>>>32) % UInt16
+    i3 = (x>>>16) % UInt16
+    i4 = x % UInt16
+    f1 = Float16(i1 >>> 5) * Float16(0x1.0p-11)
+    f2 = Float16(i2 >>> 5) * Float16(0x1.0p-11)
+    f3 = Float16(i3 >>> 5) * Float16(0x1.0p-11)
+    f4 = Float16(i4 >>> 5) * Float16(0x1.0p-11)
+    return (UInt64(reinterpret(UInt16, f1)) << 48) | (UInt64(reinterpret(UInt16, f2)) << 32) | (UInt64(reinterpret(UInt16, f3)) << 16) | UInt64(reinterpret(UInt16, f4))
+end
 
 # required operations. These could be written more concisely with `ntuple`, but the compiler
 # sometimes refuses to properly vectorize.
@@ -118,6 +129,18 @@ for N in [4,8,16]
         ret <$N x i64> %i
         """
         @eval @inline _bits2float(x::$VT, ::Type{Float32}) = llvmcall($code, $VT, Tuple{$VT}, x)
+
+        code = """
+        %as16 = bitcast <$N x i64> %0 to <$(4N) x i16>
+        %shiftamt = shufflevector <1 x i16> <i16 5>, <1 x i16> undef, <$(4N) x i32> zeroinitializer
+        %sh = lshr <$(4N) x i16> %as16, %shiftamt
+        %f = uitofp <$(4N) x i16> %sh to <$(4N) x half>
+        %scale = shufflevector <1 x half> <half 0x3f40000000000000>, <1 x half> undef, <$(4N) x i32> zeroinitializer
+        %m = fmul <$(4N) x half> %f, %scale
+        %i = bitcast <$(4N) x half> %m to <$N x i64>
+        ret <$N x i64> %i
+        """
+        @eval @inline _bits2float(x::$VT, ::Type{Float16}) = llvmcall($code, $VT, Tuple{$VT}, x)
     end
 end
 
@@ -137,7 +160,7 @@ end
 
 _id(x, T) = x
 
-@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F}
+@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float16}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F}
     if len >= simdThreshold(T)
         written = xoshiro_bulk_simd(rng, dst, len, T, Val(N), f)
         len -= written
@@ -265,13 +288,8 @@ end
 end
 
 
-function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float32}, ::SamplerTrivial{CloseOpen01{Float32}})
-    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*4, Float32, xoshiroWidth(), _bits2float)
-    dst
-end
-
-function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float64}, ::SamplerTrivial{CloseOpen01{Float64}})
-    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*8, Float64, xoshiroWidth(), _bits2float)
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{T}, ::SamplerTrivial{CloseOpen01{T}}) where {T<:Union{Float16,Float32,Float64}}
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*sizeof(T), T, xoshiroWidth(), _bits2float)
     dst
 end
 
diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl
index d8bb48d2764d2..b605dff9e5d80 100644
--- a/stdlib/Random/src/generation.jl
+++ b/stdlib/Random/src/generation.jl
@@ -66,7 +66,7 @@ function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat)
         limbs[end] |= Limb_high_bit
     end
     z.sign = 1
-    GC.@preserve limbs unsafe_copyto!(z.d, pointer(limbs), sp.nlimbs)
+    copyto!(z.d, limbs)
     randbool
 end
 
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index 7600457812f66..bc476181e5b0d 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -1570,11 +1570,11 @@ function deserialize(s::AbstractSerializer, ::Type{Task})
     t.storage = deserialize(s)
     state = deserialize(s)
     if state === :runnable
-        t._state = Base.task_state_runnable
+        @atomic :release t._state = Base.task_state_runnable
     elseif state === :done
-        t._state = Base.task_state_done
+        @atomic :release t._state = Base.task_state_done
     elseif state === :failed
-        t._state = Base.task_state_failed
+        @atomic :release t._state = Base.task_state_failed
     else
         @assert false
     end
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index 5baf8826cc883..3c30b214305fb 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -450,7 +450,7 @@ function send(sock::UDPSocket, ipaddr::IPAddr, port::Integer, msg)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(uvw) != C_NULL
             # uvw is still alive,
             # so make sure we won't get spurious notifications later
diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl
index 4ee9e07a58430..866a1684c85a1 100644
--- a/stdlib/Sockets/src/addrinfo.jl
+++ b/stdlib/Sockets/src/addrinfo.jl
@@ -90,7 +90,7 @@ function getalladdrinfo(host::String)
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
@@ -223,7 +223,7 @@ function getnameinfo(address::Union{IPv4, IPv6})
     finally
         Base.sigatomic_end()
         iolock_begin()
-        ct.queue === nothing || Base.list_deletefirst!(ct.queue, ct)
+        q = ct.queue; q === nothing || Base.list_deletefirst!(q::IntrusiveLinkedList{Task}, ct)
         if uv_req_data(req) != C_NULL
             # req is still alive,
             # so make sure we don't get spurious notifications later
diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl
index 2c50b4a0f8b4a..778d9f7415bcc 100644
--- a/stdlib/Sockets/test/runtests.jl
+++ b/stdlib/Sockets/test/runtests.jl
@@ -453,6 +453,8 @@ end
         catch e
             if isa(e, Base.IOError) && Base.uverrorname(e.code) == "EPERM"
                 @warn "UDP IPv4 broadcast test skipped (permission denied upon send, restrictive firewall?)"
+            elseif Sys.isapple() && isa(e, Base.IOError) && Base.uverrorname(e.code) == "EHOSTUNREACH"
+                @warn "UDP IPv4 broadcast test skipped (local network access not granted?)"
             else
                 rethrow()
             end
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
index 7c99b8ba52d7c..019306a3e9f65 100644
--- a/stdlib/SparseArrays.version
+++ b/stdlib/SparseArrays.version
@@ -1,4 +1,4 @@
 SPARSEARRAYS_BRANCH = main
-SPARSEARRAYS_SHA1 = e61663ad0a79a48906b0b12d53506e731a614ab8
+SPARSEARRAYS_SHA1 = 0dd8d45d55b305458d0d3d3451057589b684f72f
 SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
 SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
diff --git a/stdlib/StyledStrings.version b/stdlib/StyledStrings.version
index 2067083aec74b..83fbece4c8bc0 100644
--- a/stdlib/StyledStrings.version
+++ b/stdlib/StyledStrings.version
@@ -1,4 +1,4 @@
 STYLEDSTRINGS_BRANCH = main
-STYLEDSTRINGS_SHA1 = d7496d24d3f05536bce6a7eb4cd8ca05a75c02aa
+STYLEDSTRINGS_SHA1 = f6035eb97b516862b16e36cab2ecc6ea8adc3d7c
 STYLEDSTRINGS_GIT_URL := https://github.com/JuliaLang/StyledStrings.jl.git
 STYLEDSTRINGS_TAR_URL = https://api.github.com/repos/JuliaLang/StyledStrings.jl/tarball/$1
diff --git a/stdlib/SuiteSparse_jll/Project.toml b/stdlib/SuiteSparse_jll/Project.toml
index 314208ffc344c..39b8447138a2d 100644
--- a/stdlib/SuiteSparse_jll/Project.toml
+++ b/stdlib/SuiteSparse_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "SuiteSparse_jll"
 uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
-version = "7.7.0+0"
+version = "7.8.0+0"
 
 [deps]
 libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
@@ -8,7 +8,7 @@ Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [compat]
-julia = "1.11"
+julia = "1.12"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/TOML/src/TOML.jl b/stdlib/TOML/src/TOML.jl
index 7414b5dc686f4..b37a5ca83c251 100644
--- a/stdlib/TOML/src/TOML.jl
+++ b/stdlib/TOML/src/TOML.jl
@@ -25,7 +25,7 @@ module Internals
 end
 
 # https://github.com/JuliaLang/julia/issues/36605
-readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
+_readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
 
 """
     Parser()
@@ -36,19 +36,15 @@ explicitly create a `Parser` but instead one directly use use
 will however reuse some internal data structures which can be beneficial for
 performance if a larger number of small files are parsed.
 """
-const Parser = Internals.Parser
-
-"""
-    DTParser()
-
-Constructor for a TOML `Parser` which returns date and time objects from Dates.
-"""
-function DTParser(args...; kwargs...)
-    parser = Parser(args...; kwargs...)
-    parser.Dates = Dates
-    return parser
+struct Parser
+    _p::Internals.Parser{Dates}
 end
 
+# Dates-enabled constructors
+Parser() = Parser(Internals.Parser{Dates}())
+Parser(io::IO) = Parser(Internals.Parser{Dates}(io))
+Parser(str::String; filepath=nothing) = Parser(Internals.Parser{Dates}(str; filepath))
+
 """
     parsefile(f::AbstractString)
     parsefile(p::Parser, f::AbstractString)
@@ -59,9 +55,9 @@ Parse file `f` and return the resulting table (dictionary). Throw a
 See also [`TOML.tryparsefile`](@ref).
 """
 parsefile(f::AbstractString) =
-    Internals.parse(DTParser(readstring(f); filepath=abspath(f)))
+    Internals.parse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f)))
 parsefile(p::Parser, f::AbstractString) =
-    Internals.parse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
+    Internals.parse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f)))
 
 """
     tryparsefile(f::AbstractString)
@@ -73,9 +69,9 @@ Parse file `f` and return the resulting table (dictionary). Return a
 See also [`TOML.parsefile`](@ref).
 """
 tryparsefile(f::AbstractString) =
-    Internals.tryparse(DTParser(readstring(f); filepath=abspath(f)))
+    Internals.tryparse(Internals.Parser{Dates}(_readstring(f); filepath=abspath(f)))
 tryparsefile(p::Parser, f::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
+    Internals.tryparse(Internals.reinit!(p._p, _readstring(f); filepath=abspath(f)))
 
 """
     parse(x::Union{AbstractString, IO})
@@ -86,10 +82,11 @@ Throw a [`ParserError`](@ref) upon failure.
 
 See also [`TOML.tryparse`](@ref).
 """
+parse(p::Parser) = Internals.parse(p._p)
 parse(str::AbstractString) =
-    Internals.parse(DTParser(String(str)))
+    Internals.parse(Internals.Parser{Dates}(String(str)))
 parse(p::Parser, str::AbstractString) =
-    Internals.parse(Internals.reinit!(p, String(str)))
+    Internals.parse(Internals.reinit!(p._p, String(str)))
 parse(io::IO) = parse(read(io, String))
 parse(p::Parser, io::IO) = parse(p, read(io, String))
 
@@ -102,10 +99,11 @@ Return a [`ParserError`](@ref) upon failure.
 
 See also [`TOML.parse`](@ref).
 """
+tryparse(p::Parser) = Internals.tryparse(p._p)
 tryparse(str::AbstractString) =
-    Internals.tryparse(DTParser(String(str)))
+    Internals.tryparse(Internals.Parser{Dates}(String(str)))
 tryparse(p::Parser, str::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, String(str)))
+    Internals.tryparse(Internals.reinit!(p._p, String(str)))
 tryparse(io::IO) = tryparse(read(io, String))
 tryparse(p::Parser, io::IO) = tryparse(p, read(io, String))
 
@@ -137,4 +135,17 @@ supported type.
 """
 const print = Internals.Printer.print
 
+public Parser, parsefile, tryparsefile, parse, tryparse, ParserError, print
+
+# These methods are private Base interfaces, but we do our best to support them over
+# the TOML stdlib types anyway to minimize downstream breakage.
+Base.TOMLCache(p::Parser) = Base.TOMLCache(p._p, Dict{String, Base.CachedTOMLDict}())
+Base.TOMLCache(p::Parser, d::Base.CachedTOMLDict) = Base.TOMLCache(p._p, d)
+Base.TOMLCache(p::Parser, d::Dict{String, Dict{String, Any}}) = Base.TOMLCache(p._p, d)
+
+Internals.reinit!(p::Parser, str::String; filepath::Union{Nothing, String}=nothing) =
+    Internals.reinit!(p._p, str; filepath)
+Internals.parse(p::Parser) = Internals.parse(p._p)
+Internals.tryparse(p::Parser) = Internals.tryparse(p._p)
+
 end
diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl
index be2ed3acce5b5..53be1b04708b3 100644
--- a/stdlib/TOML/test/values.jl
+++ b/stdlib/TOML/test/values.jl
@@ -4,16 +4,31 @@ using Test
 using TOML
 using TOML: Internals
 
+# Construct an explicit Parser to test the "cached" version of parsing
+const test_parser = TOML.Parser()
+
 function testval(s, v)
     f = "foo = $s"
+    # First, test with the standard entrypoint
     parsed = TOML.parse(f)["foo"]
     return isequal(v, parsed) && typeof(v) == typeof(parsed)
+    (!isequal(v, parsed) || typeof(v) != typeof(parsed)) && return false
+    # Next, test with the "cached" (explicit Parser) entrypoint
+    parsed = TOML.parse(test_parser, f)["foo"]
+    (!isequal(v, parsed) || typeof(v) != typeof(parsed)) && return false
+    return true
 end
 
 function failval(s, v)
     f = "foo = $s"
+    # First, test with the standard entrypoint
     err = TOML.tryparse(f);
     return err isa TOML.Internals.ParserError && err.type == v
+    (!isa(err, TOML.Internals.ParserError) || err.type != v) && return false
+    # Next, test with the "cached" (explicit Parser) entrypoint
+    err = TOML.tryparse(test_parser, f);
+    (!isa(err, TOML.Internals.ParserError) || err.type != v) && return false
+    return true
 end
 
 @testset "Numbers" begin
@@ -157,6 +172,6 @@ end
 @testset "Array" begin
     @test testval("[1,2,3]", Int64[1,2,3])
     @test testval("[1.0, 2.0, 3.0]", Float64[1.0, 2.0, 3.0])
-    @test testval("[1.0, 2.0, 3]", Union{Int64, Float64}[1.0, 2.0, Int64(3)])
+    @test testval("[1.0, 2.0, 3]", Any[1.0, 2.0, Int64(3)])
     @test testval("[1.0, 2, \"foo\"]", Any[1.0, Int64(2), "foo"])
 end
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
index f1c361eff972e..2403cd1c7c635 100644
--- a/stdlib/Tar.version
+++ b/stdlib/Tar.version
@@ -1,4 +1,4 @@
 TAR_BRANCH = master
-TAR_SHA1 = 81888a33704b233a2ad6f82f84456a1dd82c87f0
+TAR_SHA1 = 1114260f5c7a7b59441acadca2411fa227bb8a3b
 TAR_GIT_URL := https://github.com/JuliaIO/Tar.jl.git
 TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 3ecf0c151164d..46bc2d8790cec 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -1838,9 +1838,19 @@ function parse_testset_args(args)
         # a standalone symbol is assumed to be the test set we should use
         # the same is true for a symbol that's not exported from a module
         if isa(arg, Symbol) || Base.isexpr(arg, :.)
+            if testsettype !== nothing
+                msg = """Multiple testset types provided to @testset. \
+                    This is deprecated and may error in the future."""
+                Base.depwarn(msg, :testset_multiple_testset_types; force=true)
+            end
             testsettype = esc(arg)
         # a string is the description
         elseif isa(arg, AbstractString) || (isa(arg, Expr) && arg.head === :string)
+            if desc !== nothing
+                msg = """Multiple descriptions provided to @testset. \
+                    This is deprecated and may error in the future."""
+                Base.depwarn(msg, :testset_multiple_descriptions; force=true)
+            end
             desc = esc(arg)
         # an assignment is an option
         elseif isa(arg, Expr) && arg.head === :(=)
@@ -2077,7 +2087,7 @@ function detect_ambiguities(mods::Module...;
     while !isempty(work)
         mod = pop!(work)
         for n in names(mod, all = true)
-            Base.isdeprecated(mod, n) && continue
+            (!Base.isbindingresolved(mod, n) || Base.isdeprecated(mod, n)) && continue
             if !isdefined(mod, n)
                 if is_in_mods(mod, recursive, mods)
                     if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
@@ -2148,7 +2158,7 @@ function detect_unbound_args(mods...;
     while !isempty(work)
         mod = pop!(work)
         for n in names(mod, all = true)
-            Base.isdeprecated(mod, n) && continue
+            (!Base.isbindingresolved(mod, n) || Base.isdeprecated(mod, n)) && continue
             if !isdefined(mod, n)
                 if is_in_mods(mod, recursive, mods)
                     if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index 31919c2881f6b..3ddcd7d5de0fd 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -1725,4 +1725,14 @@ end
         result = read(pipeline(ignorestatus(cmd), stderr=devnull), String)
         @test occursin(expected, result)
     end
+
+end
+
+@testset "Deprecated multiple arguments" begin
+    msg1 = """Multiple descriptions provided to @testset. \
+        This is deprecated and may error in the future."""
+    @test_deprecated msg1 @macroexpand @testset "name1" "name2" begin end
+    msg2 = """Multiple testset types provided to @testset. \
+        This is deprecated and may error in the future."""
+    @test_deprecated msg2 @macroexpand @testset DefaultTestSet DefaultTestSet begin end
 end
diff --git a/stdlib/UUIDs/src/UUIDs.jl b/stdlib/UUIDs/src/UUIDs.jl
index 500f2767e8e96..e3f5f812ef6e2 100644
--- a/stdlib/UUIDs/src/UUIDs.jl
+++ b/stdlib/UUIDs/src/UUIDs.jl
@@ -39,7 +39,7 @@ const namespace_x500 = UUID(0x6ba7b8149dad11d180b400c04fd430c8) # 6ba7b814-9dad-
     uuid1([rng::AbstractRNG]) -> UUID
 
 Generates a version 1 (time-based) universally unique identifier (UUID), as specified
-by RFC 4122. Note that the Node ID is randomly generated (does not identify the host)
+by [RFC 4122](https://www.ietf.org/rfc/rfc4122). Note that the Node ID is randomly generated (does not identify the host)
 according to section 4.5 of the RFC.
 
 The default rng used by `uuid1` is not `Random.default_rng()` and every invocation of `uuid1()` without
@@ -62,6 +62,13 @@ UUID("cfc395e8-590f-11e8-1f13-43a2532b2fa8")
 ```
 """
 function uuid1(rng::AbstractRNG=Random.RandomDevice())
+    # 0x01b21dd213814000 is the number of 100 nanosecond intervals
+    # between the UUID epoch and Unix epoch
+    timestamp = round(UInt64, time() * 1e7) + 0x01b21dd213814000
+    _build_uuid1(rng, timestamp)
+end
+
+function _build_uuid1(rng::AbstractRNG, timestamp::UInt64)
     u = rand(rng, UInt128)
 
     # mask off clock sequence and node
@@ -70,9 +77,6 @@ function uuid1(rng::AbstractRNG=Random.RandomDevice())
     # set the unicast/multicast bit and version
     u |= 0x00000000000010000000010000000000
 
-    # 0x01b21dd213814000 is the number of 100 nanosecond intervals
-    # between the UUID epoch and Unix epoch
-    timestamp = round(UInt64, time() * 1e7) + 0x01b21dd213814000
     ts_low = timestamp & typemax(UInt32)
     ts_mid = (timestamp >> 32) & typemax(UInt16)
     ts_hi = (timestamp >> 48) & 0x0fff
@@ -81,14 +85,14 @@ function uuid1(rng::AbstractRNG=Random.RandomDevice())
     u |= UInt128(ts_mid) << 80
     u |= UInt128(ts_hi) << 64
 
-    UUID(u)
+    return UUID(u)
 end
 
 """
     uuid4([rng::AbstractRNG]) -> UUID
 
 Generates a version 4 (random or pseudo-random) universally unique identifier (UUID),
-as specified by RFC 4122.
+as specified by [RFC 4122](https://www.ietf.org/rfc/rfc4122).
 
 The default rng used by `uuid4` is not `Random.default_rng()` and every invocation of `uuid4()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
@@ -161,7 +165,7 @@ end
     uuid7([rng::AbstractRNG]) -> UUID
 
 Generates a version 7 (random or pseudo-random) universally unique identifier (UUID),
-as specified by RFC 9652.
+as specified by [RFC 9652](https://www.rfc-editor.org/rfc/rfc9562).
 
 The default rng used by `uuid7` is not `Random.default_rng()` and every invocation of `uuid7()` without
 an argument should be expected to return a unique identifier. Importantly, the outputs of
@@ -183,14 +187,18 @@ UUID("019026ca-e086-772a-9638-f7b8557cd282")
 ```
 """
 function uuid7(rng::AbstractRNG=Random.RandomDevice())
+    # current time in ms, rounded to an Integer
+    timestamp = round(UInt128, time() * 1e3)
+    _build_uuid7(rng, timestamp)
+end
+
+function _build_uuid7(rng::AbstractRNG, timestamp::UInt128)
     bytes = rand(rng, UInt128)
     # make space for the timestamp
     bytes &= 0x0000000000000fff3fffffffffffffff
     # version & variant
     bytes |= 0x00000000000070008000000000000000
 
-    # current time in ms, rounded to an Integer
-    timestamp = round(UInt128, time() * 1e3)
     bytes |= timestamp << UInt128(80)
 
     return UUID(bytes)
diff --git a/stdlib/UUIDs/test/runtests.jl b/stdlib/UUIDs/test/runtests.jl
index f9d8516554580..c6da441076ea8 100644
--- a/stdlib/UUIDs/test/runtests.jl
+++ b/stdlib/UUIDs/test/runtests.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, UUIDs, Random
-
+using UUIDs: _build_uuid1, _build_uuid7
 
 # results similar to Python builtin uuid
 # To reproduce the sequence
@@ -56,9 +56,22 @@ end
     @test u7 == UUID(UInt128(u7))
 end
 
-@testset "uuid4 & uuid7 RNG stability" begin
+@testset "Passing an RNG" begin
+    rng = Xoshiro(0)
+    @test uuid1(rng) isa UUID
+    @test uuid4(rng) isa UUID
+    @test uuid7(rng) isa UUID
+end
+
+@testset "uuid1, uuid4 & uuid7 RNG stability" begin
     @test uuid4(Xoshiro(0)) == uuid4(Xoshiro(0))
-    @test uuid7(Xoshiro(0)) == uuid7(Xoshiro(0))
+
+    time_uuid1 = rand(UInt64)
+    time_uuid7 = rand(UInt128)
+
+    # we need to go through the internal function to test RNG stability
+    @test _build_uuid1(Xoshiro(0), time_uuid1) == _build_uuid1(Xoshiro(0), time_uuid1)
+    @test _build_uuid7(Xoshiro(0), time_uuid7) == _build_uuid7(Xoshiro(0), time_uuid7)
 end
 
 @testset "Rejection of invalid UUID strings" begin
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
index f6d93dcb94042..a0eac13b3ab23 100644
--- a/stdlib/libLLVM_jll/Project.toml
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "libLLVM_jll"
 uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
-version = "17.0.6+4"
+version = "18.1.7+2"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
index 1d24d2470db2d..1dd22b7fb8d40 100644
--- a/stdlib/libblastrampoline_jll/Project.toml
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -1,6 +1,6 @@
 name = "libblastrampoline_jll"
 uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
-version = "5.10.1+0"
+version = "5.11.0+0"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
diff --git a/test/Makefile b/test/Makefile
index 1b9cb377c943d..6ebdd3c764fd5 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -24,6 +24,8 @@ EMBEDDING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/embedding" "CC=$(CC
 
 GCEXT_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/gcext" "CC=$(CC)"
 
+TRIMMING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(JULIAHOME)/usr/bin" "CC=$(CC)"
+
 default:
 
 $(TESTS):
@@ -66,6 +68,9 @@ embedding:
 gcext:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(GCEXT_ARGS)
 
+trimming:
+	@$(MAKE) -C $(SRCDIR)/$@ check $(TRIMMING_ARGS)
+
 clangsa:
 	@$(MAKE) -C $(SRCDIR)/$@
 
@@ -73,5 +78,6 @@ clean:
 	@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
 	@$(MAKE) -C gcext $@ $(GCEXT_ARGS)
 	@$(MAKE) -C llvmpasses $@
+	@$(MAKE) -C trimming $@ $(TRIMMING_ARGS)
 
-.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext clangsa clean
+.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) relocatedepot revise-relocatedepot embedding gcext trimming clangsa clean
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index ae530261527f7..f655d9abe423f 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -1436,6 +1436,31 @@ using .Main.OffsetArrays
     end
 end
 
+@testset "Check push!($a, $args...)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1)),
+    args in (("eenie",), ("eenie", "minie"), ("eenie", "minie", "mo"))
+        orig = copy(a)
+        push!(a, args...)
+        @test length(a) == length(orig) + length(args)
+        @test a[axes(orig,1)] == orig
+        @test all(a[end-length(args)+1:end] .== args)
+end
+
+@testset "Check append!($a, $args)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1)),
+    args in (("eenie",), ("eenie", "minie"), ("eenie", "minie", "mo"))
+        orig = copy(a)
+        append!(a, args)
+        @test length(a) == length(orig) + length(args)
+        @test a[axes(orig,1)] == orig
+        @test all(a[end-length(args)+1:end] .== args)
+end
+
+@testset "Check sizehint!($a)" for
+    a in (["foo", "Bar"], SimpleArray(["foo", "Bar"]), SimpleArray{Any}(["foo", "Bar"]), OffsetVector(["foo", "Bar"], 0:1))
+        @test sizehint!(a, 10) === a
+end
+
 @testset "splatting into hvcat" begin
     t = (1, 2)
     @test [t...; 3 4] == [1 2; 3 4]
@@ -1784,6 +1809,9 @@ end
     @test_throws ArgumentError stack([1:3, 4:6]; dims=3)
     @test_throws ArgumentError stack(abs2, 1:3; dims=2)
 
+    @test stack(["hello", "world"]) isa Matrix{Char}
+    @test_throws DimensionMismatch stack(["hello", "world!"])  # had a bug in error printing
+
     # Empty
     @test_throws ArgumentError stack(())
     @test_throws ArgumentError stack([])
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index 748660cc9c981..2f8a4193cf592 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -97,10 +97,7 @@ ambig(x::Union{Char, Int16}) = 's'
 
 # Automatic detection of ambiguities
 
-const allowed_undefineds = Set([
-    GlobalRef(Base, :active_repl),
-    GlobalRef(Base, :active_repl_backend),
-])
+const allowed_undefineds = Set([])
 
 let Distributed = get(Base.loaded_modules,
                       Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed"),
@@ -165,6 +162,22 @@ end
 ambs = detect_ambiguities(Ambig48312)
 @test length(ambs) == 4
 
+module UnboundAmbig55868
+    module B
+        struct C end
+        export C
+        Base.@deprecate_binding D C
+    end
+    using .B
+    export C, D
+end
+@test !Base.isbindingresolved(UnboundAmbig55868, :C)
+@test !Base.isbindingresolved(UnboundAmbig55868, :D)
+@test isempty(detect_unbound_args(UnboundAmbig55868))
+@test isempty(detect_ambiguities(UnboundAmbig55868))
+@test !Base.isbindingresolved(UnboundAmbig55868, :C)
+@test !Base.isbindingresolved(UnboundAmbig55868, :D)
+
 # Test that Core and Base are free of ambiguities
 # not using isempty so this prints more information when it fails
 @testset "detect_ambiguities" begin
@@ -450,4 +463,20 @@ cc46601(::Type{T}, x::Int) where {T<:AbstractString} = 7
 @test length(methods(cc46601, Tuple{Type{<:Integer}, Integer})) == 2
 @test length(Base.methods_including_ambiguous(cc46601, Tuple{Type{<:Integer}, Integer})) == 7
 
+# Issue #55231
+struct U55231{P} end
+struct V55231{P} end
+U55231(::V55231) = nothing
+(::Type{T})(::V55231) where {T<:U55231} = nothing
+@test length(methods(U55231)) == 2
+U55231(a, b) = nothing
+@test length(methods(U55231)) == 3
+struct S55231{P} end
+struct T55231{P} end
+(::Type{T})(::T55231) where {T<:S55231} = nothing
+S55231(::T55231) = nothing
+@test length(methods(S55231)) == 2
+S55231(a, b) = nothing
+@test length(methods(S55231)) == 3
+
 nothing
diff --git a/test/arrayops.jl b/test/arrayops.jl
index f4bb2dc7372f8..333b68e287c4c 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -308,6 +308,35 @@ end
     @test_throws ArgumentError dropdims(a, dims=4)
     @test_throws ArgumentError dropdims(a, dims=6)
 
+
+    a = rand(8, 7)
+    @test @inferred(insertdims(a, dims=1)) == @inferred(insertdims(a, dims=(1,))) == reshape(a, (1, 8, 7))
+    @test @inferred(insertdims(a, dims=3))  == @inferred(insertdims(a, dims=(3,))) == reshape(a, (8, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 3)))  == reshape(a, (1, 8, 1, 7))
+    @test @inferred(insertdims(a, dims=(1, 2, 3)))  == reshape(a, (1, 1, 1, 8, 7))
+    @test @inferred(insertdims(a, dims=(1, 4)))  == reshape(a, (1, 8, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 3, 5)))  == reshape(a, (1, 8, 1, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 2, 4, 6)))  == reshape(a, (1, 1, 8, 1, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 3, 4, 6)))  == reshape(a, (1, 8, 1, 1, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 4, 6, 3)))  == reshape(a, (1, 8, 1, 1, 7, 1))
+    @test @inferred(insertdims(a, dims=(1, 3, 5, 6)))  == reshape(a, (1, 8, 1, 7, 1, 1))
+
+    @test_throws ArgumentError insertdims(a, dims=(1, 1, 2, 3))
+    @test_throws ArgumentError insertdims(a, dims=(1, 2, 2, 3))
+    @test_throws ArgumentError insertdims(a, dims=(1, 2, 3, 3))
+    @test_throws UndefKeywordError insertdims(a)
+    @test_throws ArgumentError insertdims(a, dims=0)
+    @test_throws ArgumentError insertdims(a, dims=(1, 2, 1))
+    @test_throws ArgumentError insertdims(a, dims=4)
+    @test_throws ArgumentError insertdims(a, dims=6)
+
+    # insertdims and dropdims are inverses
+    b = rand(1,1,1,5,1,1,7)
+    for dims in [1, (1,), 2, (2,), 3, (3,), (1,3), (1,2,3), (1,2), (1,3,5), (1,2,5,6), (1,3,5,6), (1,3,5,6), (1,6,5,3)]
+        @test dropdims(insertdims(a; dims); dims) == a
+        @test insertdims(dropdims(b; dims); dims) == b
+    end
+
     sz = (5,8,7)
     A = reshape(1:prod(sz),sz...)
     @test A[2:6] == [2:6;]
@@ -562,32 +591,32 @@ end
     @test findall(!, m) == [k for (k,v) in pairs(m) if !v]
     @test findfirst(!iszero, a) == 2
     @test findfirst(a.==0) == 1
-    @test findfirst(a.==5) == nothing
+    @test findfirst(a.==5) === nothing
     @test findfirst(Dict(1=>false, 2=>true)) == 2
-    @test findfirst(Dict(1=>false)) == nothing
+    @test findfirst(Dict(1=>false)) === nothing
     @test findfirst(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findfirst(!isequal(1), [1,2,4,1,2,3,4]) == 2
     @test findfirst(isodd, [2,4,6,3,9,2,0]) == 4
-    @test findfirst(isodd, [2,4,6,2,0]) == nothing
+    @test findfirst(isodd, [2,4,6,2,0]) === nothing
     @test findnext(!iszero,a,4) == 4
     @test findnext(!iszero,a,5) == 6
     @test findnext(!iszero,a,1) == 2
     @test findnext(isequal(1),a,4) == 6
-    @test findnext(isequal(5),a,4) == nothing
+    @test findnext(isequal(5),a,4) === nothing
     @test findlast(!iszero, a) == 8
     @test findlast(a.==0) == 5
-    @test findlast(a.==5) == nothing
-    @test findlast(false) == nothing # test non-AbstractArray findlast
+    @test findlast(a.==5) === nothing
+    @test findlast(false) === nothing # test non-AbstractArray findlast
     @test findlast(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findlast(isodd, [2,4,6,3,9,2,0]) == 5
-    @test findlast(isodd, [2,4,6,2,0]) == nothing
+    @test findlast(isodd, [2,4,6,2,0]) === nothing
     @test findprev(!iszero,a,4) == 4
     @test findprev(!iszero,a,5) == 4
-    @test findprev(!iszero,a,1) == nothing
+    @test findprev(!iszero,a,1) === nothing
     @test findprev(isequal(1),a,4) == 2
     @test findprev(isequal(1),a,8) == 6
     @test findprev(isodd, [2,4,5,3,9,2,0], 7) == 5
-    @test findprev(isodd, [2,4,5,3,9,2,0], 2) == nothing
+    @test findprev(isodd, [2,4,5,3,9,2,0], 2) === nothing
     @test findfirst(isequal(0x00), [0x01, 0x00]) == 2
     @test findlast(isequal(0x00), [0x01, 0x00]) == 2
     @test findnext(isequal(0x00), [0x00, 0x01, 0x00], 2) == 3
@@ -3219,42 +3248,23 @@ end
     end
 end
 
-@testset "Wrapping Memory into Arrays with view and reshape" begin
-    mem::Memory{Int} = Memory{Int}(undef, 10) .= 11:20
-
-    @test_throws DimensionMismatch reshape(mem, 10, 10)
-    @test_throws DimensionMismatch reshape(mem, 5)
-    @test_throws BoundsError view(mem, 1:10, 1:10)
-    @test_throws BoundsError view(mem, 1:11)
-    @test_throws BoundsError view(mem, 3:11)
-    @test_throws BoundsError view(mem, 0:4)
-
-    @test @inferred(view(mem, 1:5))::Vector{Int} == 11:15
-    @test @inferred(view(mem, 1:2))::Vector{Int} == 11:12
-    @test @inferred(view(mem, 1:10))::Vector{Int} == 11:20
-    @test @inferred(view(mem, 3:8))::Vector{Int} == 13:18
-    @test @inferred(view(mem, 20:19))::Vector{Int} == []
-    @test @inferred(view(mem, -5:-7))::Vector{Int} == []
-    @test @inferred(view(mem, :))::Vector{Int} == mem
-    @test @inferred(reshape(mem, 5, 2))::Matrix{Int} == reshape(11:20, 5, 2)
+@testset "Wrapping Memory into Arrays" begin
+    mem = Memory{Int}(undef, 10) .= 1
+    memref = memoryref(mem)
+    @test_throws DimensionMismatch Base.wrap(Array, mem, (10, 10))
+    @test Base.wrap(Array, mem, (5,)) == ones(Int, 5)
+    @test Base.wrap(Array, mem, 2) == ones(Int, 2)
+    @test Base.wrap(Array, memref, 10) == ones(Int, 10)
+    @test Base.wrap(Array, memref, (2,2,2)) == ones(Int,2,2,2)
+    @test Base.wrap(Array, mem, (5, 2)) == ones(Int, 5, 2)
 
-    # 53990
-    @test @inferred(view(mem, unsigned(1):10))::Vector{Int} == 11:20
-
-    empty_mem = Memory{Module}(undef, 0)
-    @test_throws BoundsError view(empty_mem, 0:1)
-    @test_throws BoundsError view(empty_mem, 1:2)
-    @test_throws DimensionMismatch reshape(empty_mem, 1)
-    @test_throws DimensionMismatch reshape(empty_mem, 1, 2, 3)
-    @test_throws ArgumentError reshape(empty_mem, 2^16, 2^16, 2^16, 2^16)
-
-    @test @inferred(view(empty_mem, 1:0))::Vector{Module} == []
-    @test @inferred(view(empty_mem, 10:3))::Vector{Module} == []
-    @test @inferred(view(empty_mem, :))::Vector{Module} == empty_mem
-    @test isempty(@inferred(reshape(empty_mem, 0, 7, 1))::Array{Module, 3})
-
-    offset_inds = OffsetArrays.IdOffsetRange(values=3:6, indices=53:56)
-    @test @inferred(view(collect(mem), offset_inds)) == view(mem, offset_inds)
+    memref2 = memoryref(mem, 3)
+    @test Base.wrap(Array, memref2, (5,)) == ones(Int, 5)
+    @test Base.wrap(Array, memref2, 2) == ones(Int, 2)
+    @test Base.wrap(Array, memref2, (2,2,2)) == ones(Int,2,2,2)
+    @test Base.wrap(Array, memref2, (3, 2)) == ones(Int, 3, 2)
+    @test_throws DimensionMismatch Base.wrap(Array, memref2, 9)
+    @test_throws DimensionMismatch Base.wrap(Array, memref2, 10)
 end
 
 @testset "Memory size" begin
@@ -3265,3 +3275,9 @@ end
     @test size(mem, 2) == 1
     @test size(mem, 0x2) == 1
 end
+
+@testset "MemoryRef" begin
+    mem = Memory{Float32}(undef, 3)
+    ref = memoryref(mem, 2)
+    @test parent(ref) === mem
+end
diff --git a/test/atomics.jl b/test/atomics.jl
index 3df9e7d0f63c0..adfe4c87138cd 100644
--- a/test/atomics.jl
+++ b/test/atomics.jl
@@ -129,6 +129,7 @@ test_field_operators(ARefxy{Any}(123_10, 123_20))
 test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
 test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
 test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
+test_field_operators(ARefxy{Complex{Real}}(123_10, 123_20))
 test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
 test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
 #FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
@@ -317,6 +318,8 @@ test_field_orderings(ARefxy{Any}(true, false), true, false)
 test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
 test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
 test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_field_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
+test_field_orderings(Complex{Rational{Integer}}(10, 30), Complex{Rational{Integer}}(20, 40))
 test_field_orderings(10.0, 20.0)
 test_field_orderings(NaN, Inf)
 
@@ -568,6 +571,7 @@ test_global_operators(Any)
 test_global_operators(Union{Nothing,Int})
 test_global_operators(Complex{Int32})
 test_global_operators(Complex{Int128})
+test_global_operators(Complex{Real})
 test_global_operators(PadIntA)
 test_global_operators(PadIntB)
 #FIXME: test_global_operators(Int24)
@@ -691,6 +695,7 @@ test_global_orderings(Any, true, false)
 test_global_orderings(Union{Nothing,Missing}, nothing, missing)
 test_global_orderings(Union{Nothing,Int}, nothing, 123_1)
 test_global_orderings(Complex{Int128}, Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_global_orderings(Complex{Real}, Complex{Real}(10, 30), Complex{Real}(20, 40))
 test_global_orderings(Float64, 10.0, 20.0)
 test_global_orderings(Float64, NaN, Inf)
 
@@ -844,6 +849,7 @@ test_memory_operators(Any)
 test_memory_operators(Union{Nothing,Int})
 test_memory_operators(Complex{Int32})
 test_memory_operators(Complex{Int128})
+test_memory_operators(Complex{Real})
 test_memory_operators(PadIntA)
 test_memory_operators(PadIntB)
 #FIXME: test_memory_operators(Int24)
@@ -1031,6 +1037,7 @@ test_memory_orderings(Any, true, false)
 test_memory_orderings(Union{Nothing,Missing}, nothing, missing)
 test_memory_orderings(Union{Nothing,Int}, nothing, 123_1)
 test_memory_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_memory_orderings(Complex{Real}(10, 30), Complex{Real}(20, 40))
 test_memory_orderings(10.0, 20.0)
 test_memory_orderings(NaN, Inf)
 
diff --git a/test/bitarray.jl b/test/bitarray.jl
index 2cf285370441e..67d8fae0eda6d 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -1357,11 +1357,11 @@ timesofar("find")
     @test findprev(b1, 777)  == findprevnot(b2, 777)  == findprev(!, b2, 777)  == 777
     @test findprev(b1, 776)  == findprevnot(b2, 776)  == findprev(!, b2, 776)  == 77
     @test findprev(b1, 77)   == findprevnot(b2, 77)   == findprev(!, b2, 77)   == 77
-    @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   == nothing
-    @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   == nothing
-    @test findprev(identity, b1, -1) == nothing
-    @test findprev(Returns(false), b1, -1) == nothing
-    @test findprev(Returns(true), b1, -1) == nothing
+    @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   === nothing
+    @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   === nothing
+    @test findprev(identity, b1, -1) === nothing
+    @test findprev(Returns(false), b1, -1) === nothing
+    @test findprev(Returns(true), b1, -1) === nothing
     @test_throws BoundsError findnext(b1, -1)
     @test_throws BoundsError findnextnot(b2, -1)
     @test_throws BoundsError findnext(!, b2, -1)
@@ -1372,28 +1372,28 @@ timesofar("find")
     @test findnext(b1, 77)   == findnextnot(b2, 77)   == findnext(!, b2, 77)   == 77
     @test findnext(b1, 78)   == findnextnot(b2, 78)   == findnext(!, b2, 78)   == 777
     @test findnext(b1, 777)  == findnextnot(b2, 777)  == findnext(!, b2, 777)  == 777
-    @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  == nothing
-    @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) == nothing
-    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) == nothing
+    @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  === nothing
+    @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) === nothing
+    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) === nothing
 
     @test findlast(b1) == Base.findlastnot(b2) == 777
     @test findfirst(b1) == Base.findfirstnot(b2) == 77
 
     b0 = BitVector()
-    @test findprev(Returns(true), b0, -1) == nothing
+    @test findprev(Returns(true), b0, -1) === nothing
     @test_throws BoundsError findprev(Returns(true), b0, 1)
     @test_throws BoundsError findnext(Returns(true), b0, -1)
-    @test findnext(Returns(true), b0, 1) == nothing
+    @test findnext(Returns(true), b0, 1) === nothing
 
     b1 = falses(10)
     @test findprev(Returns(true), b1, 5) == 5
     @test findnext(Returns(true), b1, 5) == 5
-    @test findprev(Returns(true), b1, -1) == nothing
-    @test findnext(Returns(true), b1, 11) == nothing
-    @test findprev(Returns(false), b1, 5) == nothing
-    @test findnext(Returns(false), b1, 5) == nothing
-    @test findprev(Returns(false), b1, -1) == nothing
-    @test findnext(Returns(false), b1, 11) == nothing
+    @test findprev(Returns(true), b1, -1) === nothing
+    @test findnext(Returns(true), b1, 11) === nothing
+    @test findprev(Returns(false), b1, 5) === nothing
+    @test findnext(Returns(false), b1, 5) === nothing
+    @test findprev(Returns(false), b1, -1) === nothing
+    @test findnext(Returns(false), b1, 11) === nothing
     @test_throws BoundsError findprev(Returns(true), b1, 11)
     @test_throws BoundsError findnext(Returns(true), b1, -1)
 
@@ -1415,7 +1415,7 @@ timesofar("find")
     for l = [1, 63, 64, 65, 127, 128, 129]
         f = falses(l)
         t = trues(l)
-        @test findprev(f, l) == findprevnot(t, l) == nothing
+        @test findprev(f, l) == findprevnot(t, l) === nothing
         @test findprev(t, l) == findprevnot(f, l) == l
         b1 = falses(l)
         b1[end] = true
diff --git a/test/broadcast.jl b/test/broadcast.jl
index e4309bf81419f..b2232258744ac 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -979,6 +979,10 @@ end
     @test sum(bc, dims=1, init=0) == [5]
     bc = Broadcast.instantiate(Broadcast.broadcasted(*, ['a','b'], 'c'))
     @test prod(bc, dims=1, init="") == ["acbc"]
+
+    a = rand(-10:10,32,4); b = rand(-10:10,32,4)
+    bc = Broadcast.instantiate(Broadcast.broadcasted(+,a,b))
+    @test sum(bc; dims = 1, init = 0.0) == sum(collect(bc); dims = 1, init = 0.0)
 end
 
 # treat Pair as scalar:
diff --git a/test/ccall.jl b/test/ccall.jl
index a406af46f0c34..b10504de21abc 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -1937,7 +1937,10 @@ end
 
 # issue #52025
 @test Base.unsafe_convert(Ptr{Ptr{Cchar}}, Base.cconvert(Ptr{Ptr{Cchar}}, map(pointer, ["ab"]))) isa Ptr{Ptr{Cchar}}
-
+#issue #54725
+for A in (reinterpret(UInt, [0]), reshape([0, 0], 1, 2))
+    @test pointer(A) == Base.unsafe_convert(Ptr{Cvoid}, A) == Base.unsafe_convert(Ptr{Int}, A)
+end
 # Cglobal with non-static symbols doesn't error
 function cglobal_non_static1()
     sym = (:global_var, libccalltest)
diff --git a/test/channels.jl b/test/channels.jl
index f1642de1b7bec..eed7a7ecc0566 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -12,6 +12,9 @@ using Base: n_avail
     end
     @test wait(a) == "success"
     @test fetch(t) == "finished"
+
+    # Test printing
+    @test repr(a) == "Condition()"
 end
 
 @testset "wait first behavior of wait on Condition" begin
@@ -382,7 +385,7 @@ end
         """error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")""", output))
     # test for invalid state in Workqueue during yield
     t = @async nothing
-    t._state = 66
+    @atomic t._state = 66
     newstderr = redirect_stderr()
     try
         errstream = @async read(newstderr[1], String)
@@ -500,7 +503,7 @@ end
     c = Channel(1)
     close(c)
     @test !isopen(c)
-    c.excp == nothing # to trigger the branch
+    c.excp === nothing # to trigger the branch
     @test_throws InvalidStateException Base.check_channel_state(c)
 end
 
diff --git a/test/char.jl b/test/char.jl
index 5da92121b1630..3100add0e81c5 100644
--- a/test/char.jl
+++ b/test/char.jl
@@ -121,7 +121,7 @@ end
     #iterate(c::Char)
     for x in testarrays
         @test iterate(x)[1] == x
-        @test iterate(x, iterate(x)[2]) == nothing
+        @test iterate(x, iterate(x)[2]) === nothing
     end
 
     #isless(x::Char, y::Integer) = isless(UInt32(x), y)
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 01a8acaeaea94..cc3f8950f0dc0 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -339,43 +339,37 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test errors_not_signals(`$exename -C invalidtarget`)
     @test errors_not_signals(`$exename --cpu-target=invalidtarget`)
 
-    if Sys.iswindows()
-        # -t, --threads
-        code = "print(Threads.threadpoolsize())"
-        cpu_threads = ccall(:jl_effective_threads, Int32, ())
-        @test string(cpu_threads) ==
-            read(`$exename --threads auto -e $code`, String) ==
-            read(`$exename --threads=auto -e $code`, String) ==
-            read(`$exename -tauto -e $code`, String) ==
-            read(`$exename -t auto -e $code`, String)
-        for nt in (nothing, "1")
-            withenv("JULIA_NUM_THREADS" => nt) do
-                @test read(`$exename --threads=2 -e $code`, String) ==
-                    read(`$exename -t 2 -e $code`, String) == "2"
-            end
-        end
-        # We want to test oversubscription, but on manycore machines, this can
-        # actually exhaust limited PID spaces
-        cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
-        if Sys.WORD_SIZE == 32
-            cpu_threads = min(cpu_threads, 50)
-        end
-        @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
-        withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
-            @test read(`$exename -e $code`, String) == string(cpu_threads)
+    # -t, --threads
+    code = "print(Threads.threadpoolsize())"
+    cpu_threads = ccall(:jl_effective_threads, Int32, ())
+    @test string(cpu_threads) ==
+        read(`$exename --threads auto -e $code`, String) ==
+        read(`$exename --threads=auto -e $code`, String) ==
+        read(`$exename -tauto -e $code`, String) ==
+        read(`$exename -t auto -e $code`, String)
+    for nt in (nothing, "1")
+        withenv("JULIA_NUM_THREADS" => nt) do
+            @test read(`$exename --threads=2 -e $code`, String) ==
+                read(`$exename -t 2 -e $code`, String) == "2"
         end
-        @test errors_not_signals(`$exename -t 0`)
-        @test errors_not_signals(`$exename -t -1`)
+    end
+    # We want to test oversubscription, but on manycore machines, this can
+    # actually exhaust limited PID spaces
+    cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
+    if Sys.WORD_SIZE == 32
+        cpu_threads = min(cpu_threads, 50)
+    end
+    @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
+    withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
+        @test read(`$exename -e $code`, String) == string(cpu_threads)
+    end
+    @test errors_not_signals(`$exename -t 0`)
+    @test errors_not_signals(`$exename -t -1`)
 
-        # Combining --threads and --procs: --threads does propagate
-        withenv("JULIA_NUM_THREADS" => nothing) do
-            code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
-            @test read(`$exename -p2 -t2 -e $code`, String) == "6"
-        end
-    else
-        @test_skip "Command line tests with -t are flakey on non-Windows OS"
-        # Known issue: https://github.com/JuliaLang/julia/issues/49154
-        # These tests should be fixed and reenabled on all operating systems.
+    # Combining --threads and --procs: --threads does propagate
+    withenv("JULIA_NUM_THREADS" => nothing) do
+        code = "print(sum(remotecall_fetch(Threads.threadpoolsize, x) for x in procs()))"
+        @test read(`$exename -p2 -t2 -e $code`, String) == "6"
     end
 
     # Combining --threads and invalid -C should yield a decent error
@@ -793,6 +787,17 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # tested in test/parallel.jl)
     @test errors_not_signals(`$exename --worker=true`)
 
+    # --trace-compile
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-compile=stderr -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
     # --trace-compile-timing
     let
         io = IOBuffer()
@@ -804,6 +809,17 @@ let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
         @test occursin(" ms =# precompile(Tuple{typeof(Main.foo), Int", _stderr)
     end
 
+    # --trace-dispatch
+    let
+        io = IOBuffer()
+        v = writereadpipeline(
+            "foo(x) = begin Base.Experimental.@force_compile; x; end; foo(1)",
+            `$exename --trace-dispatch=stderr -i`,
+            stderr=io)
+        _stderr = String(take!(io))
+        @test occursin("precompile(Tuple{typeof(Main.foo), Int", _stderr)
+    end
+
     # test passing arguments
     mktempdir() do dir
         testfile, io = mktemp(dir)
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
index 0d475a8259000..009128b289ade 100644
--- a/test/compiler/AbstractInterpreter.jl
+++ b/test/compiler/AbstractInterpreter.jl
@@ -176,7 +176,6 @@ end == Val{6}
 @newinterp Issue48097Interp
 @MethodTable ISSUE_48097_MT
 CC.method_table(interp::Issue48097Interp) = CC.OverlayMethodTable(CC.get_inference_world(interp), ISSUE_48097_MT)
-CC.InferenceParams(::Issue48097Interp) = CC.InferenceParams(; unoptimize_throw_blocks=false)
 function CC.concrete_eval_eligible(interp::Issue48097Interp,
     @nospecialize(f), result::CC.MethodCallResult, arginfo::CC.ArgInfo, sv::CC.AbsIntState)
     ret = @invoke CC.concrete_eval_eligible(interp::CC.AbstractInterpreter,
@@ -410,15 +409,19 @@ end
 CC.nsplit_impl(info::NoinlineCallInfo) = CC.nsplit(info.info)
 CC.getsplit_impl(info::NoinlineCallInfo, idx::Int) = CC.getsplit(info.info, idx)
 CC.getresult_impl(info::NoinlineCallInfo, idx::Int) = CC.getresult(info.info, idx)
+CC.add_uncovered_edges_impl(edges::Vector{Any}, info::NoinlineCallInfo, @nospecialize(atype)) = CC.add_uncovered_edges!(edges, info.info, atype)
 
 function CC.abstract_call(interp::NoinlineInterpreter,
     arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
     ret = @invoke CC.abstract_call(interp::CC.AbstractInterpreter,
         arginfo::CC.ArgInfo, si::CC.StmtInfo, sv::CC.InferenceState, max_methods::Int)
-    if sv.mod in noinline_modules(interp)
-        return CC.CallMeta(ret.rt, ret.exct, ret.effects, NoinlineCallInfo(ret.info))
+    return CC.Future{CC.CallMeta}(ret, interp, sv) do ret, interp, sv
+        if sv.mod in noinline_modules(interp)
+            (;rt, exct, effects, info) = ret
+            return CC.CallMeta(rt, exct, effects, NoinlineCallInfo(info))
+        end
+        return ret
     end
-    return ret
 end
 function CC.src_inlining_policy(interp::NoinlineInterpreter,
     @nospecialize(src), @nospecialize(info::CallInfo), stmt_flag::UInt32)
@@ -432,6 +435,8 @@ end
 @inline function inlined_usually(x, y, z)
     return x * y + z
 end
+foo_split(x::Float64) = 1
+foo_split(x::Int) = 2
 
 # check if the inlining algorithm works as expected
 let src = code_typed1((Float64,Float64,Float64)) do x, y, z
@@ -445,6 +450,7 @@ let NoinlineModule = Module()
     main_func(x, y, z) = inlined_usually(x, y, z)
     @eval NoinlineModule noinline_func(x, y, z) = $inlined_usually(x, y, z)
     @eval OtherModule other_func(x, y, z) = $inlined_usually(x, y, z)
+    @eval NoinlineModule bar_split_error() = $foo_split(Core.compilerbarrier(:type, nothing))
 
     interp = NoinlineInterpreter(Set((NoinlineModule,)))
 
@@ -474,12 +480,12 @@ let NoinlineModule = Module()
         @test count(isinvoke(:inlined_usually), src.code) == 0
         @test count(iscall((src, inlined_usually)), src.code) == 0
     end
-end
 
-# Make sure that Core.Compiler has enough NamedTuple infrastructure
-# to properly give error messages for basic kwargs...
-Core.eval(Core.Compiler, quote f(;a=1) = a end)
-@test_throws MethodError Core.Compiler.f(;b=2)
+    let src = code_typed1(NoinlineModule.bar_split_error)
+        @test count(iscall((src, foo_split)), src.code) == 0
+        @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+    end
+end
 
 # custom inferred data
 # ====================
diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl
index 188ec93ebc5be..b8ad4589db626 100644
--- a/test/compiler/EscapeAnalysis/EAUtils.jl
+++ b/test/compiler/EscapeAnalysis/EAUtils.jl
@@ -116,12 +116,14 @@ CC.get_inference_world(interp::EscapeAnalyzer) = interp.world
 CC.get_inference_cache(interp::EscapeAnalyzer) = interp.inf_cache
 CC.cache_owner(::EscapeAnalyzer) = EAToken()
 
-function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::InferenceResult)
+function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, opt::OptimizationState,
+                                   ir::IRCode, caller::InferenceResult)
     # run EA on all frames that have been optimized
-    nargs = let def = caller.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
+    nargs = Int(opt.src.nargs)
+    𝕃ₒ = CC.optimizer_lattice(interp)
     get_escape_cache = GetEscapeCache(interp)
     estate = try
-        analyze_escapes(ir, nargs, CC.optimizer_lattice(interp), get_escape_cache)
+        analyze_escapes(ir, nargs, 𝕃ₒ, get_escape_cache)
     catch err
         @error "error happened within EA, inspect `Main.failed_escapeanalysis`"
         Main.failed_escapeanalysis = FailedAnalysis(ir, nargs, get_escape_cache)
@@ -133,7 +135,8 @@ function CC.ipo_dataflow_analysis!(interp::EscapeAnalyzer, ir::IRCode, caller::I
     end
     record_escapes!(interp, caller, estate, ir)
 
-    @invoke CC.ipo_dataflow_analysis!(interp::AbstractInterpreter, ir::IRCode, caller::InferenceResult)
+    @invoke CC.ipo_dataflow_analysis!(interp::AbstractInterpreter, opt::OptimizationState,
+                                      ir::IRCode, caller::InferenceResult)
 end
 
 function record_escapes!(interp::EscapeAnalyzer,
diff --git a/test/compiler/EscapeAnalysis/EscapeAnalysis.jl b/test/compiler/EscapeAnalysis/EscapeAnalysis.jl
index d8ea8be21fe07..9afe49c01562d 100644
--- a/test/compiler/EscapeAnalysis/EscapeAnalysis.jl
+++ b/test/compiler/EscapeAnalysis/EscapeAnalysis.jl
@@ -290,7 +290,7 @@ end
 
     let # typeassert
         result = code_escapes((Any,)) do x
-            y = x::String
+            y = x::Base.RefValue{Any}
             return y
         end
         r = only(findall(isreturn, result.ir.stmts.stmt))
@@ -305,11 +305,6 @@ end
         r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test !has_all_escape(result.state[Argument(2)])
-
-        result = code_escapes((Module,)) do m
-            isdefined(m, 10) # throws
-        end
-        @test has_thrown_escape(result.state[Argument(2)])
     end
 end
 
@@ -685,8 +680,8 @@ end
         @test has_all_escape(result.state[Argument(2)])
     end
     let result = @eval EATModule() begin
-            const Rx = SafeRef{String}("Rx")
-            $code_escapes((String,)) do s
+            const Rx = SafeRef{Any}(nothing)
+            $code_escapes((Base.RefValue{String},)) do s
                 setfield!(Rx, :x, s)
                 Core.sizeof(Rx[])
             end
@@ -712,7 +707,7 @@ end
     # ------------
 
     # field escape should propagate to :new arguments
-    let result = code_escapes((String,)) do a
+    let result = code_escapes((Base.RefValue{String},)) do a
             o = SafeRef(a)
             Core.donotdelete(o)
             return o[]
@@ -722,7 +717,7 @@ end
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
-    let result = code_escapes((String,)) do a
+    let result = code_escapes((Base.RefValue{String},)) do a
             t = SafeRef((a,))
             f = t[][1]
             return f
@@ -731,9 +726,8 @@ end
         r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
-        result.state[SSAValue(i)].AliasInfo
     end
-    let result = code_escapes((String, String)) do a, b
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String})) do a, b
             obj = SafeRefs(a, b)
             Core.donotdelete(obj)
             fld1 = obj[1]
@@ -748,31 +742,31 @@ end
     end
 
     # field escape should propagate to `setfield!` argument
-    let result = code_escapes((String,)) do a
-            o = SafeRef("foo")
+    let result = code_escapes((Base.RefValue{String},)) do a
+            o = SafeRef(Ref("foo"))
             Core.donotdelete(o)
             o[] = a
             return o[]
         end
-        i = only(findall(isnew, result.ir.stmts.stmt))
+        i = last(findall(isnew, result.ir.stmts.stmt))
         r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
     # propagate escape information imposed on return value of `setfield!` call
-    let result = code_escapes((String,)) do a
-            obj = SafeRef("foo")
+    let result = code_escapes((Base.RefValue{String},)) do a
+            obj = SafeRef(Ref("foo"))
             Core.donotdelete(obj)
             return (obj[] = a)
         end
-        i = only(findall(isnew, result.ir.stmts.stmt))
+        i = last(findall(isnew, result.ir.stmts.stmt))
         r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r)
         @test is_load_forwardable(result.state[SSAValue(i)])
     end
 
     # nested allocations
-    let result = code_escapes((String,)) do a
+    let result = code_escapes((Base.RefValue{String},)) do a
             o1 = SafeRef(a)
             o2 = SafeRef(o1)
             return o2[]
@@ -787,7 +781,7 @@ end
             end
         end
     end
-    let result = code_escapes((String,)) do a
+    let result = code_escapes((Base.RefValue{String},)) do a
             o1 = (a,)
             o2 = (o1,)
             return o2[1]
@@ -802,7 +796,7 @@ end
             end
         end
     end
-    let result = code_escapes((String,)) do a
+    let result = code_escapes((Base.RefValue{String},)) do a
             o1  = SafeRef(a)
             o2  = SafeRef(o1)
             o1′ = o2[]
@@ -844,7 +838,7 @@ end
             @test has_return_escape(result.state[SSAValue(i)], r)
         end
     end
-    let result = code_escapes((String,)) do x
+    let result = code_escapes((Base.RefValue{String},)) do x
             o = Ref(x)
             Core.donotdelete(o)
             broadcast(identity, o)
@@ -892,7 +886,7 @@ end
         end
     end
     # when ϕ-node merges values with different types
-    let result = code_escapes((Bool,String,String,String)) do cond, x, y, z
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},Base.RefValue{String})) do cond, x, y, z
             local out
             if cond
                 ϕ = SafeRef(x)
@@ -904,7 +898,7 @@ end
         end
         r = only(findall(isreturn, result.ir.stmts.stmt))
         t = only(findall(iscall((result.ir, throw)), result.ir.stmts.stmt))
-        ϕ = only(findall(==(Union{SafeRef{String},SafeRefs{String,String}}), result.ir.stmts.type))
+        ϕ = only(findall(==(Union{SafeRef{Base.RefValue{String}},SafeRefs{Base.RefValue{String},Base.RefValue{String}}}), result.ir.stmts.type))
         @test has_return_escape(result.state[Argument(3)], r) # x
         @test !has_return_escape(result.state[Argument(4)], r) # y
         @test has_return_escape(result.state[Argument(5)], r) # z
@@ -1038,7 +1032,7 @@ end
     end
     # alias via typeassert
     let result = code_escapes((Any,)) do a
-            r = a::String
+            r = a::Base.RefValue{String}
             return r
         end
         r = only(findall(isreturn, result.ir.stmts.stmt))
@@ -1077,11 +1071,11 @@ end
         @test has_all_escape(result.state[Argument(3)]) # a
     end
     # alias via ϕ-node
-    let result = code_escapes((Bool,String)) do cond, x
+    let result = code_escapes((Bool,Base.RefValue{String})) do cond, x
             if cond
-                ϕ2 = ϕ1 = SafeRef("foo")
+                ϕ2 = ϕ1 = SafeRef(Ref("foo"))
             else
-                ϕ2 = ϕ1 = SafeRef("bar")
+                ϕ2 = ϕ1 = SafeRef(Ref("bar"))
             end
             ϕ2[] = x
             return ϕ1[]
@@ -1094,14 +1088,16 @@ end
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
         for i in findall(isnew, result.ir.stmts.stmt)
-            @test is_load_forwardable(result.state[SSAValue(i)])
+            if result.ir[SSAValue(i)][:type] <: SafeRef
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
         end
     end
-    let result = code_escapes((Bool,Bool,String)) do cond1, cond2, x
+    let result = code_escapes((Bool,Bool,Base.RefValue{String})) do cond1, cond2, x
             if cond1
-                ϕ2 = ϕ1 = SafeRef("foo")
+                ϕ2 = ϕ1 = SafeRef(Ref("foo"))
             else
-                ϕ2 = ϕ1 = SafeRef("bar")
+                ϕ2 = ϕ1 = SafeRef(Ref("bar"))
             end
             cond2 && (ϕ2[] = x)
             return ϕ1[]
@@ -1114,12 +1110,14 @@ end
             @test is_load_forwardable(result.state[SSAValue(i)])
         end
         for i in findall(isnew, result.ir.stmts.stmt)
-            @test is_load_forwardable(result.state[SSAValue(i)])
+            if result.ir[SSAValue(i)][:type] <: SafeRef
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
         end
     end
     # alias via π-node
     let result = code_escapes((Any,)) do x
-            if isa(x, String)
+            if isa(x, Base.RefValue{String})
                 return x
             end
             throw("error!")
@@ -1213,7 +1211,7 @@ end
 
     # conservatively handle unknown field:
     # all fields should be escaped, but the allocation itself doesn't need to be escaped
-    let result = code_escapes((String, Symbol)) do a, fld
+    let result = code_escapes((Base.RefValue{String}, Symbol)) do a, fld
             obj = SafeRef(a)
             return getfield(obj, fld)
         end
@@ -1222,7 +1220,7 @@ end
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
     end
-    let result = code_escapes((String, String, Symbol)) do a, b, fld
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Symbol)) do a, b, fld
             obj = SafeRefs(a, b)
             return getfield(obj, fld) # should escape both `a` and `b`
         end
@@ -1232,7 +1230,7 @@ end
         @test has_return_escape(result.state[Argument(3)], r) # b
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
     end
-    let result = code_escapes((String, String, Int)) do a, b, idx
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Int)) do a, b, idx
             obj = SafeRefs(a, b)
             return obj[idx] # should escape both `a` and `b`
         end
@@ -1242,33 +1240,33 @@ end
         @test has_return_escape(result.state[Argument(3)], r) # b
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
     end
-    let result = code_escapes((String, String, Symbol)) do a, b, fld
-            obj = SafeRefs("a", "b")
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Symbol)) do a, b, fld
+            obj = SafeRefs(Ref("a"), Ref("b"))
             setfield!(obj, fld, a)
             return obj[2] # should escape `a`
         end
-        i = only(findall(isnew, result.ir.stmts.stmt))
+        i = last(findall(isnew, result.ir.stmts.stmt))
         r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test !has_return_escape(result.state[Argument(3)], r) # b
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
     end
-    let result = code_escapes((String, Symbol)) do a, fld
-            obj = SafeRefs("a", "b")
+    let result = code_escapes((Base.RefValue{String}, Symbol)) do a, fld
+            obj = SafeRefs(Ref("a"), Ref("b"))
             setfield!(obj, fld, a)
             return obj[1] # this should escape `a`
         end
-        i = only(findall(isnew, result.ir.stmts.stmt))
+        i = last(findall(isnew, result.ir.stmts.stmt))
         r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
     end
-    let result = code_escapes((String, String, Int)) do a, b, idx
-            obj = SafeRefs("a", "b")
+    let result = code_escapes((Base.RefValue{String}, Base.RefValue{String}, Int)) do a, b, idx
+        obj = SafeRefs(Ref("a"), Ref("b"))
             obj[idx] = a
             return obj[2] # should escape `a`
         end
-        i = only(findall(isnew, result.ir.stmts.stmt))
+        i = last(findall(isnew, result.ir.stmts.stmt))
         r = only(findall(isreturn, result.ir.stmts.stmt))
         @test has_return_escape(result.state[Argument(2)], r) # a
         @test !has_return_escape(result.state[Argument(3)], r) # b
@@ -1280,7 +1278,7 @@ end
 
     let result = @eval EATModule() begin
             @noinline getx(obj) = obj[]
-            $code_escapes((String,)) do a
+            $code_escapes((Base.RefValue{String},)) do a
                 obj = SafeRef(a)
                 fld = getx(obj)
                 return fld
@@ -1294,8 +1292,8 @@ end
     end
 
     # TODO interprocedural alias analysis
-    let result = code_escapes((SafeRef{String},)) do s
-            s[] = "bar"
+    let result = code_escapes((SafeRef{Base.RefValue{String}},)) do s
+            s[] = Ref("bar")
             global GV = s[]
             nothing
         end
@@ -1335,7 +1333,7 @@ end
     let result = @eval EATModule() begin
             @noinline mysetindex!(x, a) = x[1] = a
             const Ax = Vector{Any}(undef, 1)
-            $code_escapes((String,)) do s
+            $code_escapes((Base.RefValue{String},)) do s
                 mysetindex!(Ax, s)
             end
         end
@@ -1391,11 +1389,11 @@ end
     end
 
     # handle conflicting field information correctly
-    let result = code_escapes((Bool,String,String,)) do cnd, baz, qux
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},)) do cnd, baz, qux
             if cnd
-                o = SafeRef("foo")
+                o = SafeRef(Ref("foo"))
             else
-                o = SafeRefs("bar", baz)
+                o = SafeRefs(Ref("bar"), baz)
                 r = getfield(o, 2)
             end
             if cnd
@@ -1409,12 +1407,14 @@ end
         @test has_return_escape(result.state[Argument(3)], r) # baz
         @test has_return_escape(result.state[Argument(4)], r) # qux
         for new in findall(isnew, result.ir.stmts.stmt)
-            @test is_load_forwardable(result.state[SSAValue(new)])
+            if !(result.ir[SSAValue(new)][:type] <: Base.RefValue)
+                @test is_load_forwardable(result.state[SSAValue(new)])
+            end
         end
     end
-    let result = code_escapes((Bool,String,String,)) do cnd, baz, qux
+    let result = code_escapes((Bool,Base.RefValue{String},Base.RefValue{String},)) do cnd, baz, qux
             if cnd
-                o = SafeRefs("foo", "bar")
+                o = SafeRefs(Ref("foo"), Ref("bar"))
                 r = setfield!(o, 2, baz)
             else
                 o = SafeRef(qux)
@@ -2139,21 +2139,13 @@ end
 # ========================
 
 # propagate escapes imposed on call arguments
-@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
-let result = code_escapes() do
-        broadcast_noescape1(Ref("Hi"))
-    end
-    i = only(findall(isnew, result.ir.stmts.stmt))
-    @test !has_return_escape(result.state[SSAValue(i)])
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # TODO `getfield(RefValue{String}, :x)` isn't safe
-end
 @noinline broadcast_noescape2(b) = broadcast(identity, b)
 let result = code_escapes() do
-        broadcast_noescape2(Ref("Hi"))
+        broadcast_noescape2(Ref(Ref("Hi")))
     end
-    i = only(findall(isnew, result.ir.stmts.stmt))
+    i = last(findall(isnew, result.ir.stmts.stmt))
     @test_broken !has_return_escape(result.state[SSAValue(i)]) # TODO interprocedural alias analysis
-    @test_broken !has_thrown_escape(result.state[SSAValue(i)]) # TODO `getfield(RefValue{String}, :x)` isn't safe
+    @test !has_thrown_escape(result.state[SSAValue(i)])
 end
 @noinline allescape_argument(a) = (global GV = a) # obvious escape
 let result = code_escapes() do
@@ -2248,13 +2240,13 @@ end
 # accounts for ThrownEscape via potential MethodError
 
 # no method error
-@noinline identity_if_string(x::SafeRef) = (println("preventing inlining"); nothing)
+@noinline identity_if_string(x::SafeRef{<:AbstractString}) = (println("preventing inlining"); nothing)
 let result = code_escapes((SafeRef{String},)) do x
         identity_if_string(x)
     end
     @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
 end
-let result = code_escapes((Union{SafeRef{String},Nothing},)) do x
+let result = code_escapes((SafeRef,)) do x
         identity_if_string(x)
     end
     i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.stmt))
@@ -2307,4 +2299,21 @@ let result = code_escapes((SafeRef{String},Any)) do x, y
     @test has_all_escape(result.state[Argument(3)])  # y
 end
 
+@eval function scope_folding()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), 2),
+            :(return Core.current_scope())),
+    :(), 1))
+end
+@eval function scope_folding_opt()
+    $(Expr(:tryfinally,
+        Expr(:block,
+            Expr(:tryfinally, :(), :(), :(Base.inferencebarrier(2))),
+            :(return Core.current_scope())),
+    :(), :(Base.inferencebarrier(1))))
+end
+@test (@code_escapes scope_folding()) isa EAUtils.EscapeResult
+@test (@code_escapes scope_folding_opt()) isa EAUtils.EscapeResult
+
 end # module test_EA
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index 529ee7b611448..26ae965b35319 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -222,18 +222,18 @@ if opt_level > 0
     @test occursin("call i32 @memcmp(", compare_large_struct_ir) || occursin("call i32 @bcmp(", compare_large_struct_ir)
     @test !occursin("%gcframe", compare_large_struct_ir)
 
-    @test occursin("jl_gc_pool_alloc", get_llvm(MutableStruct, Tuple{}))
+    @test occursin("jl_gc_small_alloc", get_llvm(MutableStruct, Tuple{}))
     breakpoint_mutable_ir = get_llvm(breakpoint_mutable, Tuple{MutableStruct})
     @test !occursin("%gcframe", breakpoint_mutable_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_mutable_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_mutable_ir)
 
     breakpoint_badref_ir = get_llvm(breakpoint_badref, Tuple{MutableStruct})
     @test !occursin("%gcframe", breakpoint_badref_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_badref_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_badref_ir)
 
     breakpoint_ptrstruct_ir = get_llvm(breakpoint_ptrstruct, Tuple{RealStruct})
     @test !occursin("%gcframe", breakpoint_ptrstruct_ir)
-    @test !occursin("jl_gc_pool_alloc", breakpoint_ptrstruct_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_ptrstruct_ir)
 end
 
 function two_breakpoint(a::Float64)
@@ -251,17 +251,17 @@ end
 if opt_level > 0
     breakpoint_f64_ir = get_llvm((a)->ccall(:jl_breakpoint, Cvoid, (Ref{Float64},), a),
                                  Tuple{Float64})
-    @test !occursin("jl_gc_pool_alloc", breakpoint_f64_ir)
+    @test !occursin("jl_gc_small_alloc", breakpoint_f64_ir)
     breakpoint_any_ir = get_llvm((a)->ccall(:jl_breakpoint, Cvoid, (Ref{Any},), a),
                                  Tuple{Float64})
-    @test occursin("jl_gc_pool_alloc", breakpoint_any_ir)
+    @test occursin("jl_gc_small_alloc", breakpoint_any_ir)
     two_breakpoint_ir = get_llvm(two_breakpoint, Tuple{Float64})
-    @test !occursin("jl_gc_pool_alloc", two_breakpoint_ir)
+    @test !occursin("jl_gc_small_alloc", two_breakpoint_ir)
     @test occursin("llvm.lifetime.end", two_breakpoint_ir)
 
     @test load_dummy_ref(1234) === 1234
     load_dummy_ref_ir = get_llvm(load_dummy_ref, Tuple{Int})
-    @test !occursin("jl_gc_pool_alloc", load_dummy_ref_ir)
+    @test !occursin("jl_gc_small_alloc", load_dummy_ref_ir)
     # Hopefully this is reliable enough. LLVM should be able to optimize this to a direct return.
     @test occursin("ret $Iptr %\"x::$(Int)\"", load_dummy_ref_ir)
 end
@@ -440,7 +440,7 @@ function f1_30093(r)
     end
 end
 
-@test f1_30093(Ref(0)) == nothing
+@test f1_30093(Ref(0)) === nothing
 
 # issue 33590
 function f33590(b, x)
@@ -501,10 +501,9 @@ function f37262(x)
     end
 end
 @testset "#37262" begin
-    str = "store volatile { i8, {}*, {}*, {}*, {}* } zeroinitializer, { i8, {}*, {}*, {}*, {}* }* %phic"
-    str_opaque = "store volatile { i8, ptr, ptr, ptr, ptr } zeroinitializer, ptr %phic"
+    str_opaque = "getelementptr inbounds i8, ptr %.roots.phic, i32 8\n  store volatile ptr null"
     llvmstr = get_llvm(f37262, (Bool,), false, false, false)
-    @test (contains(llvmstr, str) || contains(llvmstr, str_opaque)) || llvmstr
+    @test contains(llvmstr, str_opaque)
     @test f37262(Base.inferencebarrier(true)) === nothing
 end
 
@@ -697,7 +696,7 @@ mktempdir() do pfx
         libs_deleted += 1
     end
     @test libs_deleted > 0
-    @test readchomp(`$pfx/bin/$(Base.julia_exename()) -e 'print("no codegen!\n")'`) == "no codegen!"
+    @test readchomp(`$pfx/bin/$(Base.julia_exename()) --startup-file=no -e 'print("no codegen!\n")'`) == "no codegen!"
 
     # PR #47343
     libs_emptied = 0
@@ -956,3 +955,51 @@ function foonopreds()
     pkgid.uuid !== nothing ? pkgid.uuid : false
 end
 @test foonopreds() !== nothing
+
+# issue 55396
+struct Incomplete55396
+  x::Tuple{Int}
+  y::Int
+  @noinline Incomplete55396(x::Int) = new((x,))
+end
+let x = Incomplete55396(55396)
+    @test x.x === (55396,)
+end
+
+# Core.getptls() special handling
+@test !occursin("call ptr @jlplt", get_llvm(Core.getptls, Tuple{})) #It should lower to a direct load of the ptls and not a ccall
+
+# issue 55208
+@noinline function f55208(x, i)
+    z = (i == 0 ? x[1] : x[i])
+    return z isa Core.TypeofBottom
+end
+@test f55208((Union{}, 5, 6, 7), 0)
+
+@noinline function g55208(x, i)
+    z = (i == 0 ? x[1] : x[i])
+    typeof(z)
+end
+@test g55208((Union{}, true, true), 0) === typeof(Union{})
+
+@test string((Core.Union{}, true, true, true)) == "(Union{}, true, true, true)"
+
+# Issue #55558
+for (T, StructName) in ((Int128, :Issue55558), (UInt128, :UIssue55558))
+    @eval begin
+        struct $(StructName)
+            a::$(T)
+            b::Int64
+            c::$(T)
+        end
+        local broken_i128 = Base.BinaryPlatforms.arch(Base.BinaryPlatforms.HostPlatform()) == "powerpc64le"
+        @test fieldoffset($(StructName), 2) == 16
+        @test fieldoffset($(StructName), 3) == 32 broken=broken_i128
+        @test sizeof($(StructName)) == 48 broken=broken_i128
+    end
+end
+
+@noinline Base.@nospecializeinfer f55768(@nospecialize z::UnionAll) = z === Vector
+@test f55768(Vector)
+@test f55768(Vector{T} where T)
+@test !f55768(Vector{S} where S)
diff --git a/test/compiler/effects.jl b/test/compiler/effects.jl
index a27d52d68b9a9..c8a699b294d37 100644
--- a/test/compiler/effects.jl
+++ b/test/compiler/effects.jl
@@ -810,7 +810,12 @@ end
 #        @test !Core.Compiler.is_nothrow(effects)
 #    end
 #end
-#
+
+@test Core.Compiler.is_noub(Base.infer_effects(Base._growbeg!, (Vector{Int}, Int)))
+@test Core.Compiler.is_noub(Base.infer_effects(Base._growbeg!, (Vector{Any}, Int)))
+@test Core.Compiler.is_noub(Base.infer_effects(Base._growend!, (Vector{Int}, Int)))
+@test Core.Compiler.is_noub(Base.infer_effects(Base._growend!, (Vector{Any}, Int)))
+
 # tuple indexing
 # --------------
 
@@ -1141,6 +1146,14 @@ end
 @test_broken Core.Compiler.is_effect_free(Base.infer_effects(set_arr_with_unused_arg_2, (Vector{Int},)))
 @test_broken Core.Compiler.is_effect_free_if_inaccessiblememonly(Base.infer_effects(set_arg_arr!, (Vector{Int},)))
 
+# EA-based refinement of :effect_free
+function f_EA_refine(ax, b)
+    bx = Ref{Any}()
+    @noinline bx[] = b
+    return ax[] + b
+end
+@test Core.Compiler.is_effect_free(Base.infer_effects(f_EA_refine, (Base.RefValue{Int},Int)))
+
 function issue51837(; openquotechar::Char, newlinechar::Char)
     ncodeunits(openquotechar) == 1 || throw(ArgumentError("`openquotechar` must be a single-byte character"))
     if !isnothing(newlinechar)
@@ -1361,3 +1374,8 @@ end |> Core.Compiler.is_nothrow
 @test Base.infer_effects((Vector{Any},)) do xs
     Core.svec(xs...)
 end |> Core.Compiler.is_nothrow
+
+# effects for unknown `:foreigncall`s
+@test Base.infer_effects() do
+    @ccall unsafecall()::Cvoid
+end == Core.Compiler.EFFECTS_UNKNOWN
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 8b0c6669ffb89..7c7726413004a 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -1065,7 +1065,7 @@ gl_17003 = [1, 2, 3]
 f2_17003(item::AVector_17003) = nothing
 f2_17003(::Any) = f2_17003(NArray_17003(gl_17003))
 
-@test f2_17003(1) == nothing
+@test f2_17003(1) === nothing
 
 # issue #20847
 function segfaultfunction_20847(A::Vector{NTuple{N, T}}) where {N, T}
@@ -1076,7 +1076,7 @@ end
 tuplevec_20847 = Tuple{Float64, Float64}[(0.0,0.0), (1.0,0.0)]
 
 for A in (1,)
-    @test segfaultfunction_20847(tuplevec_20847) == nothing
+    @test segfaultfunction_20847(tuplevec_20847) === nothing
 end
 
 # Issue #20902, check that this doesn't error.
@@ -1538,7 +1538,7 @@ let nfields_tfunc(@nospecialize xs...) =
     @test sizeof_nothrow(String)
     @test !sizeof_nothrow(Type{String})
     @test sizeof_tfunc(Type{Union{Int64, Int32}}) == Const(Core.sizeof(Union{Int64, Int32}))
-    let PT = Core.Compiler.PartialStruct(Tuple{Int64,UInt64}, Any[Const(10), UInt64])
+    let PT = Core.PartialStruct(Tuple{Int64,UInt64}, Any[Const(10), UInt64])
         @test sizeof_tfunc(PT) === Const(16)
         @test nfields_tfunc(PT) === Const(2)
         @test sizeof_nothrow(PT)
@@ -2151,78 +2151,75 @@ end
 
 @testset "branching on conditional object" begin
     # simple
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         b = a === nothing
         return b ? 0 : a # ::Int
-    end == Any[Int]
+    end == Int
 
     # can use multiple times (as far as the subject of condition hasn't changed)
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         b = a === nothing
         c = b ? 0 : a # c::Int
         d = !b ? a : 0 # d::Int
         return c, d # ::Tuple{Int,Int}
-    end == Any[Tuple{Int,Int}]
+    end == Tuple{Int,Int}
 
     # should invalidate old constraint when the subject of condition has changed
-    @test Base.return_types((Union{Nothing,Int},)) do a
+    @test Base.infer_return_type((Union{Nothing,Int},)) do a
         cond = a === nothing
         r1 = cond ? 0 : a # r1::Int
         a = 0
         r2 = cond ? a : 1 # r2::Int, not r2::Union{Nothing,Int}
         return r1, r2 # ::Tuple{Int,Int}
-    end == Any[Tuple{Int,Int}]
+    end == Tuple{Int,Int}
 end
 
 # https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
 # `PartialStruct` shouldn't wrap `Conditional`
-let M = Module()
-    @eval M begin
-        struct BePartialStruct
-            val::Int
-            cond
-        end
-    end
-
-    rt = @eval M begin
-        Base.return_types((Union{Nothing,Int},)) do a
-            cond = a === nothing
-            obj = $(Expr(:new, M.BePartialStruct, 42, :cond))
-            r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
-            a = $(gensym(:anyvar))::Any
-            r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
-            return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
-        end |> only
-    end
-    @test rt == Tuple{Union{Nothing,Int},Any}
+struct BePartialStruct
+    val::Int
+    cond
+end
+@test Tuple{Union{Nothing,Int},Any} == @eval Base.infer_return_type((Union{Nothing,Int},)) do a
+    cond = a === nothing
+    obj = $(Expr(:new, BePartialStruct, 42, :cond))
+    r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
+    a = $(gensym(:anyvar))::Any
+    r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
+    return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
 end
 
 # make sure we never form nested `Conditional` (https://github.com/JuliaLang/julia/issues/46207)
-@test Base.return_types((Any,)) do a
+@test Base.infer_return_type((Any,)) do a
     c = isa(a, Integer)
     42 === c ? :a : "b"
-end |> only === String
-@test Base.return_types((Any,)) do a
+end == String
+@test Base.infer_return_type((Any,)) do a
     c = isa(a, Integer)
     c === 42 ? :a : "b"
-end |> only === String
-
-@testset "conditional constraint propagation from non-`Conditional` object" begin
-    @test Base.return_types((Bool,)) do b
-        if b
-            return !b ? nothing : 1 # ::Int
-        else
-            return 0
-        end
-    end == Any[Int]
+end == String
 
-    @test Base.return_types((Any,)) do b
-        if b
-            return b # ::Bool
-        else
-            return nothing
-        end
-    end == Any[Union{Bool,Nothing}]
+function condition_object_update1(cond)
+    if cond # `cond` is known to be `Const(true)` within this branch
+        return !cond ? nothing : 1 # ::Int
+    else
+        return  cond ? nothing : 1 # ::Int
+    end
+end
+function condition_object_update2(x)
+    cond = x isa Int
+    if cond # `cond` is known to be `Const(true)` within this branch
+        return !cond ? nothing : x # ::Int
+    else
+        return  cond ? nothing : 1 # ::Int
+    end
+end
+@testset "state update for condition object" begin
+    # refine the type of condition object into constant boolean values on branching
+    @test Base.infer_return_type(condition_object_update1, (Bool,)) == Int
+    @test Base.infer_return_type(condition_object_update1, (Any,)) == Int
+    # refine even when their original type is `Conditional`
+    @test Base.infer_return_type(condition_object_update2, (Any,)) == Int
 end
 
 @testset "`from_interprocedural!`: translate inter-procedural information" begin
@@ -3890,113 +3887,6 @@ f_apply_cglobal(args...) = cglobal(args...)
 f37532(T, x) = (Core.bitcast(Ptr{T}, x); x)
 @test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int]
 
-# PR #37749
-# Helper functions for Core.Compiler.Timings. These are normally accessed via a package -
-# usually (SnoopCompileCore).
-function time_inference(f)
-    Core.Compiler.Timings.reset_timings()
-    Core.Compiler.__set_measure_typeinf(true)
-    f()
-    Core.Compiler.__set_measure_typeinf(false)
-    Core.Compiler.Timings.close_current_timer()
-    return Core.Compiler.Timings._timings[1]
-end
-function depth(t::Core.Compiler.Timings.Timing)
-    maximum(depth.(t.children), init=0) + 1
-end
-function flatten_times(t::Core.Compiler.Timings.Timing)
-    collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...]))
-end
-# Some very limited testing of timing the type inference (#37749).
-@testset "Core.Compiler.Timings" begin
-    # Functions that call each other
-    @eval module M1
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    timing1 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1))
-    # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
-    # their concrete types.
-    @test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
-    @test all(child->isa(child.bt, Vector), timing1.children)
-    @test all(child->child.bt===nothing, timing1.children[1].children)
-    # Test the stacktrace
-    @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame})
-    # Test that inference has cached some of the Method Instances
-    timing2 = time_inference() do
-        @eval M1.g(2, 3.0)
-    end
-    @test length(flatten_times(timing2)) < length(flatten_times(timing1))
-    # Printing of InferenceFrameInfo for mi.def isa Module
-    @eval module M2
-        i(x) = x+5
-        i2(x) = x+2
-        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
-        g(y::Integer, x) = h(Any[y]) + Int(x)
-    end
-    # BEGIN LINE NUMBER SENSITIVITY (adjust the line offset below as needed)
-    timingmod = time_inference() do
-        @eval @testset "Outer" begin
-            @testset "Inner" begin
-                for i = 1:2 M2.g(2, 3.0) end
-            end
-        end
-    end
-    @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 6)", string(timingmod.children))
-    # END LINE NUMBER SENSITIVITY
-
-    # Recursive function
-    @eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end
-    timing = time_inference() do
-        @eval _Recursive.f(Base.inferencebarrier(5))
-    end
-    @test 2 <= depth(timing) <= 3  # root -> f (-> +)
-    @test 2 <= length(flatten_times(timing)) <= 3  # root, f, +
-
-    # Functions inferred with multiple constants
-    @eval module C
-        i(x) = x === 0 ? 0 : 1 / x
-        a(x) = i(0) * i(x)
-        b() = i(0) * i(1) * i(0)
-        function loopc(n)
-            s = 0
-            for i = 1:n
-                s += i
-            end
-            return s
-        end
-        call_loopc() = loopc(5)
-        myfloor(::Type{T}, x) where T = floor(T, x)
-        d(x) = myfloor(Int16, x)
-    end
-    timing = time_inference() do
-        @eval C.a(2)
-        @eval C.b()
-        @eval C.call_loopc()
-        @eval C.d(3.2)
-    end
-    ft = flatten_times(timing)
-    @test !isempty(ft)
-    str = sprint(show, ft)
-    @test occursin("InferenceFrameInfo for /(1::$Int, ::$Int)", str)  # inference constants
-    @test occursin("InferenceFrameInfo for Core.Compiler.Timings.ROOT()", str) # qualified
-    # loopc has internal slots, check constant printing in this case
-    sel = filter(ti -> ti.second.mi.def.name === :loopc, ft)
-    ifi = sel[end].second
-    @test length(ifi.slottypes) > ifi.nargs
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.loopc(5::$Int)", str)
-    # check that types aren't double-printed as `T::Type{T}`
-    sel = filter(ti -> ti.second.mi.def.name === :myfloor, ft)
-    str = sprint(show, sel)
-    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.myfloor(::Type{Int16}, ::Float64)", str)
-end
-
 # issue #37638
 @test only(Base.return_types(() -> (nothing, Any[]...)[2])) isa Type
 
@@ -4203,6 +4093,110 @@ end
     end
 end == [Union{Some{Float64}, Some{Int}, Some{UInt8}}]
 
+@testset "constraint back-propagation from typeassert" begin
+    @test Base.infer_return_type((Any,)) do a
+        typeassert(a, Int)
+        return a
+    end == Int
+
+    @test Base.infer_return_type((Any,Bool)) do a, b
+        if b
+            typeassert(a, Int64)
+        else
+            typeassert(a, Int32)
+        end
+        return a
+    end == Union{Int32,Int64}
+end
+
+callsig_backprop_basic(::Int) = nothing
+callsig_backprop_unionsplit(::Int32) = nothing
+callsig_backprop_unionsplit(::Int64) = nothing
+callsig_backprop_multi(::Int32, ::Int64) = nothing
+callsig_backprop_any(::Any) = nothing
+callsig_backprop_lhs(::Int) = nothing
+callsig_backprop_bailout(::Val{0}) = 0
+callsig_backprop_bailout(::Val{1}) = undefvar # undefvar::Any triggers `bail_out_call`
+callsig_backprop_bailout(::Val{2}) = 2
+callsig_backprop_addinteger(a::Integer, b::Integer) = a + b # results in too many matching methods and triggers `bail_out_call`)
+@test Base.infer_return_type(callsig_backprop_addinteger) == Any
+let effects = Base.infer_effects(callsig_backprop_addinteger)
+    @test !Core.Compiler.is_consistent(effects)
+    @test !Core.Compiler.is_effect_free(effects)
+    @test !Core.Compiler.is_nothrow(effects)
+    @test !Core.Compiler.is_terminates(effects)
+end
+callsig_backprop_anti(::Any) = :any
+callsig_backprop_anti(::Int) = :int
+
+@testset "constraint back-propagation from call signature" begin
+    # basic case
+    @test Base.infer_return_type(a->(callsig_backprop_basic(a); return a), (Any,)) == Int
+
+    # union-split case
+    @test Base.infer_return_type(a->(callsig_backprop_unionsplit(a); return a), (Any,)) == Union{Int32,Int64}
+
+    # multiple arguments updates
+    @test Base.infer_return_type((Any,Any)) do a, b
+        callsig_backprop_multi(a, b)
+        return a, b
+    end == Tuple{Int32,Int64}
+
+    # refinement should happen only when it's worthwhile
+    @test Base.infer_return_type(a->(callsig_backprop_any(a); return a), (Integer,)) == Integer
+
+    # state update on lhs slot (assignment effect should have the precedence)
+    @test Base.infer_return_type((Any,)) do a
+        a = callsig_backprop_lhs(a)
+        return a
+    end == Nothing
+
+    # make sure to throw away an intermediate refinement information when we bail out early
+    # (inference would bail out on `callsig_backprop_bailout(::Val{1})`)
+    @test Base.infer_return_type(a->(callsig_backprop_bailout(a); return a), (Any,)) == Any
+
+    # if we see all the matching methods, we don't need to throw away refinement information
+    # even if it's caught by `bail_out_call` check
+    @test Base.infer_return_type((Any,Any)) do a, b
+        callsig_backprop_addinteger(a, b)
+        return a, b
+    end == Tuple{Integer,Integer}
+
+    # anti case
+    @test Base.infer_return_type((Any,)) do x
+        callsig_backprop_anti(x)
+        return x
+    end == Any
+end
+
+# make sure to add backedges when we use call signature constraint
+function callsig_backprop_invalidation_outer(a)
+    callsig_backprop_invalidation_inner!(a)
+    return a
+end
+@eval callsig_backprop_invalidation_inner!(::Int) = $(gensym(:undefvar)) # ::Any
+@test Base.infer_return_type((Any,)) do a
+    callsig_backprop_invalidation_outer(a)
+end == Int
+# new definition of `callsig_backprop_invalidation_inner!` should invalidate `callsig_backprop_invalidation_outer`
+# (even if the previous return type is annotated as `Any`)
+@eval callsig_backprop_invalidation_inner!(::Nothing) = $(gensym(:undefvar)) # ::Any
+@test Base.infer_return_type((Any,)) do a
+    # since inference will bail out at the first matched `_inner!` and so call signature constraint won't be available
+    callsig_backprop_invalidation_outer(a)
+end ≠ Int
+
+# https://github.com/JuliaLang/julia/issues/37866
+function issue37866(v::Vector{Union{Nothing,Float64}})
+    for x in v
+        if x > 5.0
+            return x # x > 5.0 is MethodError for Nothing so can assume ::Float64
+        end
+    end
+    return 0.0
+end
+@test Base.infer_return_type(issue37866, (Vector{Union{Nothing,Float64}},)) == Float64
+
 # make sure inference on a recursive call graph with nested `Type`s terminates
 # https://github.com/JuliaLang/julia/issues/40336
 f40336(@nospecialize(t)) = f40336(Type{t})
@@ -4642,32 +4636,80 @@ end
 
 # issue #43784
 @testset "issue #43784" begin
-    init = Base.ImmutableDict{Any,Any}()
-    a = Const(init)
-    b = Core.PartialStruct(typeof(init), Any[Const(init), Any, Any])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c)
-    @test ⊑(b, c)
-
-    init = Base.ImmutableDict{Number,Number}()
-    a = Const(init)
-    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c) && ⊑(b, c)
-    @test c === typeof(init)
-
-    a = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c) && ⊑(b, c)
-    @test c.fields[2] === Any # or Number
-    @test c.fields[3] === ComplexF64
-
-    b = Core.Compiler.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
-    c = Core.Compiler.tmerge(a, b)
-    @test ⊑(a, c)
-    @test ⊑(b, c)
-    @test c.fields[2] === Complex
-    @test c.fields[3] === Complex
+    ⊑ = Core.Compiler.partialorder(Core.Compiler.fallback_lattice)
+    ⊔ = Core.Compiler.join(Core.Compiler.fallback_lattice)
+    Const, PartialStruct = Core.Const, Core.PartialStruct
+
+    let init = Base.ImmutableDict{Any,Any}()
+        a = Const(init)
+        b = PartialStruct(typeof(init), Any[Const(init), Any, Any])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === typeof(init)
+    end
+    let init = Base.ImmutableDict{Any,Any}(1,2)
+        a = Const(init)
+        b = PartialStruct(typeof(init), Any[Const(getfield(init,1)), Any, Any])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test length(c.fields) == 3
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = Const(init)
+        b = PartialStruct(typeof(init), Any[Const(init), Number, ComplexF64])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === typeof(init)
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+        b = PartialStruct(typeof(init), Any[Const(init), Number, ComplexF64])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test c.fields[2] === Number
+        @test c.fields[3] === ComplexF64
+    end
+    let init = Base.ImmutableDict{Number,Number}()
+        a = PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+        b = PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test c.fields[2] === Complex
+        @test c.fields[3] === Complex
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = PartialStruct(T, Any[T])
+        b = PartialStruct(T, Any[T, Number, Number])
+        @test b ⊑ a
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c isa PartialStruct
+        @test length(c.fields) == 1
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = PartialStruct(T, Any[T])
+        b = Const(T())
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = Const(T())
+        b = PartialStruct(T, Any[T])
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
+    let T = Base.ImmutableDict{Number,Number}
+        a = Const(T())
+        b = Const(T(1,2))
+        c = a ⊔ b
+        @test a ⊑ c && b ⊑ c
+        @test c === T
+    end
 
     global const ginit43784 = Base.ImmutableDict{Any,Any}()
     @test Base.return_types() do
@@ -4701,6 +4743,31 @@ end
     @test a == Tuple
 end
 
+let ⊑ = Core.Compiler.partialorder(Core.Compiler.fallback_lattice)
+    ⊔ = Core.Compiler.join(Core.Compiler.fallback_lattice)
+    Const, PartialStruct = Core.Const, Core.PartialStruct
+
+    @test  (Const((1,2)) ⊑ PartialStruct(Tuple{Int,Int}, Any[Const(1),Int]))
+    @test !(Const((1,2)) ⊑ PartialStruct(Tuple{Int,Int,Int}, Any[Const(1),Int,Int]))
+    @test !(Const((1,2,3)) ⊑ PartialStruct(Tuple{Int,Int}, Any[Const(1),Int]))
+    @test  (Const((1,2,3)) ⊑ PartialStruct(Tuple{Int,Int,Int}, Any[Const(1),Int,Int]))
+    @test  (Const((1,2)) ⊑ PartialStruct(Tuple{Int,Vararg{Int}}, Any[Const(1),Vararg{Int}]))
+    @test  (Const((1,2)) ⊑ PartialStruct(Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}])) broken=true
+    @test  (Const((1,2,3)) ⊑ PartialStruct(Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]))
+    @test !(PartialStruct(Tuple{Int,Int}, Any[Const(1),Int]) ⊑ Const((1,2)))
+    @test !(PartialStruct(Tuple{Int,Int,Int}, Any[Const(1),Int,Int]) ⊑ Const((1,2)))
+    @test !(PartialStruct(Tuple{Int,Int}, Any[Const(1),Int]) ⊑ Const((1,2,3)))
+    @test !(PartialStruct(Tuple{Int,Int,Int}, Any[Const(1),Int,Int]) ⊑ Const((1,2,3)))
+    @test !(PartialStruct(Tuple{Int,Vararg{Int}}, Any[Const(1),Vararg{Int}]) ⊑ Const((1,2)))
+    @test !(PartialStruct(Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]) ⊑ Const((1,2)))
+    @test !(PartialStruct(Tuple{Int,Int,Vararg{Int}}, Any[Const(1),Int,Vararg{Int}]) ⊑ Const((1,2,3)))
+
+    t = Const((false, false)) ⊔ Const((false, true))
+    @test t isa PartialStruct && length(t.fields) == 2 && t.fields[1] === Const(false)
+    t = t ⊔ Const((false, false, 0))
+    @test t ⊑ Union{Tuple{Bool,Bool},Tuple{Bool,Bool,Int}}
+end
+
 # Test that a function-wise `@max_methods` works as expected
 Base.Experimental.@max_methods 1 function f_max_methods end
 f_max_methods(x::Int) = 1
@@ -4951,13 +5018,13 @@ g() = empty_nt_values(Base.inferencebarrier(Tuple{}))
 # to terminate the call.
 @newinterp RecurseInterpreter
 let CC = Core.Compiler
-    function CC.const_prop_entry_heuristic(interp::RecurseInterpreter, result::CC.MethodCallResult,
-                                           si::CC.StmtInfo, sv::CC.AbsIntState, force::Bool)
+    function CC.const_prop_rettype_heuristic(interp::RecurseInterpreter, result::CC.MethodCallResult,
+                                             si::CC.StmtInfo, sv::CC.AbsIntState, force::Bool)
         if result.rt isa CC.LimitedAccuracy
             return force # allow forced constprop to recurse into unresolved cycles
         end
-        return @invoke CC.const_prop_entry_heuristic(interp::CC.AbstractInterpreter, result::CC.MethodCallResult,
-                                                     si::CC.StmtInfo, sv::CC.AbsIntState, force::Bool)
+        return @invoke CC.const_prop_rettype_heuristic(interp::CC.AbstractInterpreter, result::CC.MethodCallResult,
+                                                       si::CC.StmtInfo, sv::CC.AbsIntState, force::Bool)
     end
 end
 Base.@constprop :aggressive type_level_recurse1(x...) = x[1] == 2 ? 1 : (length(x) > 100 ? x : type_level_recurse2(x[1] + 1, x..., x...))
@@ -5765,3 +5832,219 @@ end
 bar54341(args...) = foo54341(4, args...)
 
 @test Core.Compiler.return_type(bar54341, Tuple{Vararg{Int}}) === Int
+
+# `PartialStruct` for partially initialized structs:
+struct PartiallyInitialized1
+    a; b; c
+    PartiallyInitialized1(a) = (@nospecialize; new(a))
+    PartiallyInitialized1(a, b) = (@nospecialize; new(a, b))
+    PartiallyInitialized1(a, b, c) = (@nospecialize; new(a, b, c))
+end
+mutable struct PartiallyInitialized2
+    a; b; c
+    PartiallyInitialized2(a) = (@nospecialize; new(a))
+    PartiallyInitialized2(a, b) = (@nospecialize; new(a, b))
+    PartiallyInitialized2(a, b, c) = (@nospecialize; new(a, b, c))
+end
+
+# 1. isdefined modeling for partial struct
+@test Base.infer_return_type((Any,Any)) do a, b
+    Val(isdefined(PartiallyInitialized1(a, b), :b))
+end == Val{true}
+@test Base.infer_return_type((Any,Any,)) do a, b
+    Val(isdefined(PartiallyInitialized1(a, b), :c))
+end >: Val{false}
+@test Base.infer_return_type((PartiallyInitialized1,)) do x
+    @assert isdefined(x, :a)
+    return Val(isdefined(x, :c))
+end == Val
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    Val(isdefined(PartiallyInitialized1(a, b, c), :c))
+end == Val{true}
+@test Base.infer_return_type((Any,Any)) do a, b
+    Val(isdefined(PartiallyInitialized2(a, b), :b))
+end == Val{true}
+@test Base.infer_return_type((Any,Any,)) do a, b
+    Val(isdefined(PartiallyInitialized2(a, b), :c))
+end >: Val{false}
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    s = PartiallyInitialized2(a, b)
+    s.c = c
+    Val(isdefined(s, :c))
+end >: Val{true}
+@test Base.infer_return_type((Any,Any,Any)) do a, b, c
+    Val(isdefined(PartiallyInitialized2(a, b, c), :c))
+end == Val{true}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    Val(isdefined(tuple(1, xs...), 1))
+end == Val{true}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    Val(isdefined(tuple(1, xs...), 2))
+end == Val
+
+# 2. getfield modeling for partial struct
+@test Base.infer_effects((Any,Any); optimize=false) do a, b
+    getfield(PartiallyInitialized1(a, b), :b)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Symbol,); optimize=false) do a, b, f
+    getfield(PartiallyInitialized1(a, b), f, #=boundscheck=#false)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any); optimize=false) do a, b, c
+    getfield(PartiallyInitialized1(a, b, c), :c)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any,Symbol); optimize=false) do a, b, c, f
+    getfield(PartiallyInitialized1(a, b, c), f, #=boundscheck=#false)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any); optimize=false) do a, b
+    getfield(PartiallyInitialized2(a, b), :b)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Symbol,); optimize=false) do a, b, f
+    getfield(PartiallyInitialized2(a, b), f, #=boundscheck=#false)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any); optimize=false) do a, b, c
+    getfield(PartiallyInitialized2(a, b, c), :c)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any,Any,Symbol); optimize=false) do a, b, c, f
+    getfield(PartiallyInitialized2(a, b, c), f, #=boundscheck=#false)
+end |> Core.Compiler.is_nothrow
+
+# isdefined-Conditionals
+@test Base.infer_effects((Base.RefValue{Any},)) do x
+    if isdefined(x, :x)
+        return getfield(x, :x)
+    end
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Base.RefValue{Any},)) do x
+    if isassigned(x)
+        return x[]
+    end
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Any,Any); optimize=false) do a, c
+    x = PartiallyInitialized2(a)
+    x.c = c
+    if isdefined(x, :c)
+        return x.b
+    end
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((PartiallyInitialized2,); optimize=false) do x
+    if isdefined(x, :b)
+        if isdefined(x, :c)
+            return x.c
+        end
+        return x.b
+    end
+    return nothing
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Bool,Int,); optimize=false) do c, b
+    x = c ? PartiallyInitialized1(true) : PartiallyInitialized1(true, b)
+    if isdefined(x, :b)
+        return Val(x.a), x.b
+    end
+    return nothing
+end |> Core.Compiler.is_nothrow
+
+# refine `undef` information from `@isdefined` check
+function isdefined_nothrow(c, x)
+    local val
+    if c
+        val = x
+    end
+    if @isdefined val
+        return val
+    end
+    return zero(Int)
+end
+@test Core.Compiler.is_nothrow(Base.infer_effects(isdefined_nothrow, (Bool,Int)))
+@test !any(first(only(code_typed(isdefined_nothrow, (Bool,Int)))).code) do @nospecialize x
+    Meta.isexpr(x, :throw_undef_if_not)
+end
+
+# End to end test case for the partially initialized struct with `PartialStruct`
+@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
+@test fully_eliminated() do
+    broadcast_noescape1(Ref("x"))
+end
+
+# InterConditional rt with Vararg argtypes
+fcondvarargs(a, b, c, d) = isa(d, Int64)
+gcondvarargs(a, x...) = return fcondvarargs(a, x...) ? isa(a, Int64) : !isa(a, Int64)
+@test Core.Compiler.return_type(gcondvarargs, Tuple{Vararg{Any}}) === Bool
+
+# JuliaLang/julia#55627: argtypes check in `abstract_call_opaque_closure`
+issue55627_make_oc() = Base.Experimental.@opaque (x::Int) -> 2x
+@test Base.infer_return_type() do
+    f = issue55627_make_oc()
+    return f(1), f()
+end == Union{}
+@test Base.infer_return_type((Vector{Int},)) do xs
+    f = issue55627_make_oc()
+    return f(1), f(xs...)
+end == Tuple{Int,Int}
+@test Base.infer_exception_type() do
+    f = issue55627_make_oc()
+    return f(1), f()
+end >: MethodError
+@test Base.infer_exception_type() do
+    f = issue55627_make_oc()
+    return f(1), f('1')
+end >: TypeError
+
+# `exct` modeling for opaque closure
+oc_exct_1() = Base.Experimental.@opaque (x) -> x < 0 ? throw(x) : x
+@test Base.infer_exception_type((Int,)) do x
+    oc_exct_1()(x)
+end == Int
+oc_exct_2() = Base.Experimental.@opaque Tuple{Number}->Number (x) -> '1'
+@test Base.infer_exception_type((Int,)) do x
+    oc_exct_2()(x)
+end == TypeError
+
+# nothrow modeling for `invoke` calls
+f_invoke_nothrow(::Number) = :number
+f_invoke_nothrow(::Int) = :int
+@test Base.infer_effects((Int,)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> Core.Compiler.is_nothrow
+@test Base.infer_effects((Char,)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> !Core.Compiler.is_nothrow
+@test Base.infer_effects((Union{Nothing,Int},)) do x
+    @invoke f_invoke_nothrow(x::Number)
+end |> !Core.Compiler.is_nothrow
+
+# `exct` modeling for `invoke` calls
+f_invoke_exct(x::Number) = x < 0 ? throw(x) : x
+f_invoke_exct(x::Int) = x
+@test Base.infer_exception_type((Int,)) do x
+    @invoke f_invoke_exct(x::Number)
+end == Int
+@test Base.infer_exception_type() do
+    @invoke f_invoke_exct(42::Number)
+end == Union{}
+@test Base.infer_exception_type((Union{Nothing,Int},)) do x
+    @invoke f_invoke_exct(x::Number)
+end == Union{Int,TypeError}
+@test Base.infer_exception_type((Int,)) do x
+    invoke(f_invoke_exct, Number, x)
+end == TypeError
+@test Base.infer_exception_type((Char,)) do x
+    invoke(f_invoke_exct, Tuple{Number}, x)
+end == TypeError
+
+@test Base.infer_exception_type((Vector{Any},)) do args
+    Core.throw_methoderror(args...)
+end == Union{MethodError,ArgumentError}
+
+# Issue https://github.com/JuliaLang/julia/issues/55751
+
+abstract type AbstractGrid55751{T, N} <: AbstractArray{T, N} end
+struct Grid55751{T, N, AT} <: AbstractGrid55751{T, N}
+    axes::AT
+end
+
+t155751 = Union{AbstractArray{UInt8, 4}, Array{Float32, 4}, Grid55751{Float32, 3, _A} where _A}
+t255751 = Array{Float32, 3}
+@test Core.Compiler.tmerge_types_slow(t155751,t255751) == AbstractArray # shouldn't hang
+
+issue55882_nfields(x::Union{T,Nothing}) where T<:Number = nfields(x)
+@test Base.infer_return_type(issue55882_nfields) <: Int
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index a8b5fd66dcd0d..2de6d9950d4e4 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -876,7 +876,7 @@ let src = code_typed1((Any,)) do x
         abstract_unionsplit_fallback(x)
     end
     @test count(isinvoke(:abstract_unionsplit_fallback), src.code) == 2
-    @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 let src = code_typed1((Union{Type,Number},)) do x
         abstract_unionsplit_fallback(x)
@@ -912,7 +912,7 @@ let src = code_typed1((Any,)) do x
     @test count(iscall((src, typeof)), src.code) == 2
     @test count(isinvoke(:println), src.code) == 0
     @test count(iscall((src, println)), src.code) == 0
-    @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 let src = code_typed1((Union{Type,Number},)) do x
         abstract_unionsplit_fallback(false, x)
@@ -960,8 +960,8 @@ let # aggressive inlining of single, abstract method match
     end |> only |> first
     # both callsites should be inlined
     @test count(isinvoke(:has_free_typevars), src.code) == 2
-    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,isGoodType)), src.code) == 1
+    # `isGoodType(y::Any)` isn't fully covered, so the fallback is a method error
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @inline isGoodType2(cnd, @nospecialize x::Type) =
@@ -973,8 +973,8 @@ let # aggressive inlining of single, abstract method match (with constant-prop'e
     # both callsite should be inlined with constant-prop'ed result
     @test count(isinvoke(:isType), src.code) == 2
     @test count(isinvoke(:has_free_typevars), src.code) == 0
-    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,isGoodType2)), src.code) == 1
+    # `isGoodType(y::Any)` isn't fully covered, thus a MethodError gets inserted
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @noinline function checkBadType!(@nospecialize x::Type)
@@ -989,8 +989,8 @@ let # aggressive static dispatch of single, abstract method match
     end |> only |> first
     # both callsites should be resolved statically
     @test count(isinvoke(:checkBadType!), src.code) == 2
-    # `checkBadType!(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
-    @test count(iscall((src,checkBadType!)), src.code) == 1
+    # `checkBadType!(y::Any)` isn't fully covered, thus a MethodError gets inserted
+    @test count(iscall((src, Core.throw_methoderror)), src.code) == 1 # fallback method error
 end
 
 @testset "late_inline_special_case!" begin
@@ -1570,7 +1570,6 @@ let
     @test get_finalization_count() == 1000
 end
 
-
 function cfg_finalization7(io)
     for i = -999:1000
         o = DoAllocWithField(0)
@@ -1597,6 +1596,31 @@ let
     @test get_finalization_count() == 1000
 end
 
+# Load forwarding with `finalizer` elision
+let src = code_typed1((Int,)) do x
+        xs = finalizer(Ref(x)) do obj
+            @noinline
+            Base.@assume_effects :nothrow :notaskstate
+            Core.println("finalizing: ", obj[])
+        end
+        Base.@assume_effects :nothrow @noinline println("xs[] = ", @inline xs[])
+        return xs[]
+    end
+    @test count(iscall((src, getfield)), src.code) == 0
+end
+let src = code_typed1((Int,)) do x
+        xs = finalizer(Ref(x)) do obj
+            @noinline
+            Base.@assume_effects :nothrow :notaskstate
+            Core.println("finalizing: ", obj[])
+        end
+        Base.@assume_effects :nothrow @noinline println("xs[] = ", @inline xs[])
+        xs[] += 1
+        return xs[]
+    end
+    @test count(iscall((src, getfield)), src.code) == 0
+    @test count(iscall((src, setfield!)), src.code) == 1
+end
 
 # optimize `[push!|pushfirst!](::Vector{Any}, x...)`
 @testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!]
@@ -2004,7 +2028,7 @@ f48397(::Tuple{String,String}) = :ok
 let src = code_typed1((Union{Bool,Tuple{String,Any}},)) do x
         f48397(x)
     end
-    @test any(iscall((src, f48397)), src.code)
+    @test any(iscall((src, Core.throw_methoderror)), src.code) # fallback method error)
 end
 g48397::Union{Bool,Tuple{String,Any}} = ("48397", 48397)
 let res = @test_throws MethodError let
@@ -2175,11 +2199,6 @@ let src = code_typed1() do
     @test count(isinvoke(:iterate), src.code) == 0
 end
 
-# JuliaLang/julia#53062: proper `joint_effects` for call with empty method matches
-let ir = first(only(Base.code_ircode(setproperty!, (Base.RefValue{Int},Symbol,Base.RefValue{Int}))))
-    i = findfirst(iscall((ir, convert)), ir.stmts.stmt)::Int
-    @test iszero(ir.stmts.flag[i] & Core.Compiler.IR_FLAG_NOTHROW)
-end
 function issue53062(cond)
     x = Ref{Int}(0)
     if cond
@@ -2214,3 +2233,19 @@ let ir = Base.code_ircode((Issue52644,); optimize_until="Inlining") do t
     @test irfunc(Issue52644(Tuple{})) === :DataType
     @test_throws MethodError irfunc(Issue52644(Tuple{<:Integer}))
 end
+
+foo_split(x::Float64) = 1
+foo_split(x::Int) = 2
+bar_inline_error() = foo_split(nothing)
+bar_split_error() = foo_split(Core.compilerbarrier(:type,nothing))
+
+let src = code_typed1(bar_inline_error, Tuple{})
+    # Should inline method errors
+    @test count(iscall((src, foo_split)), src.code) == 0
+    @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+end
+let src = code_typed1(bar_split_error, Tuple{})
+    # Should inline method errors
+    @test count(iscall((src, foo_split)), src.code) == 0
+    @test count(iscall((src, Core.throw_methoderror)), src.code) > 0
+end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index 281317ac25bf8..740ac5f4958e4 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -576,7 +576,6 @@ let # lifting `isa` through Core.ifelse
     @test count(iscall((src, isa)), src.code) == 0
 end
 
-
 let # lifting `isdefined` through PhiNode
     src = code_typed1((Bool,Some{Int},)) do c, x
         y = c ? x : nothing
@@ -1035,8 +1034,7 @@ exc39508 = ErrorException("expected")
 end
 @test test39508() === exc39508
 
-let
-    # `typeassert` elimination after SROA
+let # `typeassert` elimination after SROA
     # NOTE we can remove this optimization once inference is able to reason about memory-effects
     src = @eval Module() begin
         mutable struct Foo; x; end
@@ -1051,8 +1049,7 @@ let
     @test count(iscall((src, typeassert)), src.code) == 0
 end
 
-let
-    # Test for https://github.com/JuliaLang/julia/issues/43402
+let # Test for https://github.com/JuliaLang/julia/issues/43402
     # Ensure that structs required not used outside of the ccall,
     # still get listed in the ccall_preserves
 
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 06258f52cb69c..b7d75d0be5567 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -219,7 +219,7 @@ let code = Any[
     ]
     ir = make_ircode(code; verify=false)
     ir = Core.Compiler.compact!(ir, true)
-    @test Core.Compiler.verify_ir(ir) == nothing
+    @test Core.Compiler.verify_ir(ir) === nothing
 end
 
 # issue #37919
diff --git a/test/complex.jl b/test/complex.jl
index d798cfe16489c..63304652ee7d8 100644
--- a/test/complex.jl
+++ b/test/complex.jl
@@ -1215,3 +1215,9 @@ end
     @test !iseven(7+0im) && isodd(7+0im)
     @test !iseven(6+1im) && !isodd(7+1im)
 end
+
+@testset "issue #55266" begin
+    for T in (Float16, Float32, Float64)
+        @test isapprox(atanh(1+im*floatmin(T)), Complex{T}(atanh(1+im*big(floatmin(T)))))
+    end
+end
diff --git a/test/copy.jl b/test/copy.jl
index b6ad53600027a..559bf5d3e757a 100644
--- a/test/copy.jl
+++ b/test/copy.jl
@@ -198,7 +198,7 @@ end
         bar = Bar19921(foo, Dict(foo => 3))
         bar2 = deepcopy(bar)
         @test bar2.foo ∈ keys(bar2.fooDict)
-        @test bar2.fooDict[bar2.foo] != nothing
+        @test bar2.fooDict[bar2.foo] !== nothing
     end
 
     let d = IdDict(rand(2) => rand(2) for i = 1:100)
@@ -282,6 +282,8 @@ end
 
 @testset "`deepcopy` a `GenericCondition`" begin
     a = Base.GenericCondition(ReentrantLock())
+    # Test printing
+    @test repr(a) == "Base.GenericCondition(ReentrantLock())"
     @test !islocked(a.lock)
     lock(a.lock)
     @test islocked(a.lock)
diff --git a/test/core.jl b/test/core.jl
index 8b0328659ae39..b27832209a835 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -34,7 +34,7 @@ for (T, c) in (
         (Core.CodeInfo, []),
         (Core.CodeInstance, [:next, :min_world, :max_world, :inferred, :debuginfo, :ipo_purity_bits, :invoke, :specptr, :specsigflags, :precompile]),
         (Core.Method, [:primary_world, :deleted_world]),
-        (Core.MethodInstance, [:cache, :precompiled]),
+        (Core.MethodInstance, [:cache, :flags]),
         (Core.MethodTable, [:defs, :leafcache, :cache, :max_args]),
         (Core.TypeMapEntry, [:next, :min_world, :max_world]),
         (Core.TypeMapLevel, [:arg1, :targ, :name1, :tname, :list, :any]),
@@ -42,6 +42,7 @@ for (T, c) in (
         (DataType, [:types, :layout]),
         (Core.Memory, []),
         (Core.GenericMemoryRef, []),
+        (Task, [:_state])
     )
     @test Set((fieldname(T, i) for i in 1:fieldcount(T) if Base.isfieldatomic(T, i))) == Set(c)
 end
@@ -1182,7 +1183,7 @@ end
 
 # Make sure that `Module` is not resolved to `Core.Module` during sysimg generation
 # so that users can define their own binding named `Module` in Main.
-@test !Base.isbindingresolved(Main, :Module)
+@test success(`$(Base.julia_cmd()) -e '@assert !Base.isbindingresolved(Main, :Module)'`)
 
 # Module() constructor
 @test names(Module(:anonymous), all = true, imported = true) == [:anonymous]
@@ -5532,9 +5533,6 @@ let a = Base.StringVector(2^17)
     @test sizeof(c) == 0
 end
 
-# issue #53990 / https://github.com/JuliaLang/julia/pull/53896#discussion_r1555087951
-@test Base.StringVector(UInt64(2)) isa Vector{UInt8}
-
 @test_throws ArgumentError eltype(Bottom)
 
 # issue #16424, re-evaluating type definitions
@@ -5613,6 +5611,26 @@ end
     x::Array{T} where T<:Integer
 end
 
+# issue #54757, type redefinitions with recursive reference in supertype
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A,N}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A,N}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+@test_throws ErrorException struct T54757{A>:Int,N} <: AbstractArray{Tuple{X,Tuple{Vararg},Union{T54757{Union{X,Integer}},T54757{A}},Vararg{Y,N}} where {X,Y<:T54757}, N}
+    x::A
+    y::Union{A,T54757{A,N}}
+    z::T54757{A}
+end
+
+
 let a = Vector{Core.TypeofBottom}(undef, 2)
     @test a[1] == Union{}
     @test a == [Union{}, Union{}]
@@ -7033,7 +7051,7 @@ translate27368(::Type{Val{name}}) where {name} =
 # issue #27456
 @inline foo27456() = try baz_nonexistent27456(); catch; nothing; end
 bar27456() = foo27456()
-@test bar27456() == nothing
+@test bar27456() === nothing
 
 # issue #27365
 mutable struct foo27365
@@ -7498,6 +7516,13 @@ struct A43411{S, T}
 end
 @test isbitstype(A43411{(:a,), Tuple{Int}})
 
+# issue #55189
+struct A55189{N}
+    children::NTuple{N,A55189{N}}
+end
+@test fieldtype(A55189{2}, 1) === Tuple{A55189{2}, A55189{2}}
+@assert !isbitstype(A55189{2})
+
 # issue #44614
 struct T44614_1{T}
     m::T
@@ -7568,7 +7593,7 @@ end
 # issue #31696
 foo31696(x::Int8, y::Int8) = 1
 foo31696(x::T, y::T) where {T <: Int8} = 2
-@test length(methods(foo31696)) == 1
+@test length(methods(foo31696)) == 2
 let T1 = Tuple{Int8}, T2 = Tuple{T} where T<:Int8, a = T1[(1,)], b = T2[(1,)]
     b .= a
     @test b[1] == (1,)
@@ -8161,7 +8186,7 @@ let M = @__MODULE__
     @test_throws(ErrorException("cannot set type for global $(nameof(M)).a_typed_global. It already has a value or is already set to a different type."),
                  Core.eval(M, :(global a_typed_global::$(Union{Nothing,Tuple{Union{Integer,Nothing}}}))))
     @test Core.eval(M, :(global a_typed_global)) === nothing
-    @test Core.get_binding_type(M, :a_typed_global) === Tuple{Union{Integer,Nothing}}
+    @test Core.get_binding_type(M, :a_typed_global) == Tuple{Union{Integer,Nothing}}
 end
 
 @test Base.unsafe_convert(Ptr{Int}, [1]) !== C_NULL
@@ -8253,3 +8278,29 @@ let load_path = mktempdir()
         end
     end
 end
+
+# merging va tuple unions
+@test Tuple === Union{Tuple{},Tuple{Any,Vararg}}
+@test Tuple{Any,Vararg} === Union{Tuple{Any},Tuple{Any,Any,Vararg}}
+@test Core.Compiler.return_type(Base.front, Tuple{Tuple{Int,Vararg{Int}}}) === Tuple{Vararg{Int}}
+@test Tuple{Vararg{Int}} === Union{Tuple{Int}, Tuple{}, Tuple{Int, Int, Vararg{Int}}}
+@test (Tuple{Vararg{T}} where T) === (Union{Tuple{T, T, Vararg{T}}, Tuple{}, Tuple{T}} where T)
+@test_broken (Tuple{Vararg{T}} where T) === Union{Tuple{T, T, Vararg{T}} where T, Tuple{}, Tuple{T} where T}
+
+@test sizeof(Pair{Union{typeof(Union{}),Nothing}, Union{Type{Union{}},Nothing}}(Union{}, Union{})) == 2
+
+# Make sure that Core.Compiler has enough NamedTuple infrastructure
+# to properly give error messages for basic kwargs...
+Core.eval(Core.Compiler, quote issue50174(;a=1) = a end)
+@test_throws MethodError Core.Compiler.issue50174(;b=2)
+
+let s = mktemp() do path, io
+        xxx = 42
+        redirect_stdout(io) do
+            Base.@assume_effects :nothrow @show xxx
+        end
+        flush(io)
+        read(path, String)
+    end
+    @test strip(s) == "xxx = 42"
+end
diff --git a/test/corelogging.jl b/test/corelogging.jl
index 778e70aecd406..b8cd3716cad2e 100644
--- a/test/corelogging.jl
+++ b/test/corelogging.jl
@@ -140,9 +140,9 @@ end
     end
     @test length(logger.logs) == 1
     record = logger.logs[1]
-    @test record._module == nothing
-    @test record.file == nothing
-    @test record.line == nothing
+    @test record._module === nothing
+    @test record.file === nothing
+    @test record.line === nothing
 end
 
 # PR #28209
diff --git a/test/dict.jl b/test/dict.jl
index ca8a598de0b81..909afb3607907 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -8,7 +8,7 @@ using Random
     @test isequal(p,10=>20)
     @test iterate(p)[1] == 10
     @test iterate(p, iterate(p)[2])[1] == 20
-    @test iterate(p, iterate(p, iterate(p)[2])[2]) == nothing
+    @test iterate(p, iterate(p, iterate(p)[2])[2]) === nothing
     @test firstindex(p) == 1
     @test lastindex(p) == length(p) == 2
     @test Base.indexed_iterate(p, 1, nothing) == (10,2)
@@ -683,9 +683,9 @@ end
     @inferred setindex!(d, -1, 10)
     @test d[10] == -1
     @test 1 == @inferred d[1]
-    @test get(d, -111, nothing) == nothing
+    @test get(d, -111, nothing) === nothing
     @test 1 == @inferred get(d, 1, 1)
-    @test pop!(d, -111, nothing) == nothing
+    @test pop!(d, -111, nothing) === nothing
     @test 1 == @inferred pop!(d, 1)
 
     # get! and delete!
@@ -1049,7 +1049,7 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
 
     # issue #26939
     d26939 = WeakKeyDict()
-    (@noinline d -> d[big"1.0" + 1.1] = 1)(d26939)
+    (@noinline d -> d[big"1" + 1] = 1)(d26939)
     GC.gc() # primarily to make sure this doesn't segfault
     @test count(d26939) == 0
     @test length(d26939.ht) == 1
@@ -1510,9 +1510,9 @@ end
 for T in (Int, Float64, String, Symbol)
     @testset let T=T
         @test !Core.Compiler.is_consistent(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-        @test_broken Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test Core.Compiler.is_effect_free(Base.infer_effects(getindex, (Dict{T,Any}, T)))
         @test !Core.Compiler.is_nothrow(Base.infer_effects(getindex, (Dict{T,Any}, T)))
-        @test_broken Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
+        @test Core.Compiler.is_terminates(Base.infer_effects(getindex, (Dict{T,Any}, T)))
     end
 end
 
diff --git a/test/docs.jl b/test/docs.jl
index f62f7f8b63b2c..92d45fe05e397 100644
--- a/test/docs.jl
+++ b/test/docs.jl
@@ -4,6 +4,7 @@ import Base.Docs: meta, @var, DocStr, parsedoc
 
 # check that @doc can work before REPL is loaded
 @test !startswith(read(`$(Base.julia_cmd()) -E '@doc sin'`, String), "nothing")
+@test !startswith(read(`$(Base.julia_cmd()) -E '@doc @time'`, String), "nothing")
 
 using Markdown
 using REPL
@@ -574,8 +575,8 @@ end
 
 let T = meta(DocVars)[@var(DocVars.T)],
     S = meta(DocVars)[@var(DocVars.S)],
-    Tname = Markdown.parse("```\n$(curmod_prefix)DocVars.T\n```"),
-    Sname = Markdown.parse("```\n$(curmod_prefix)DocVars.S\n```")
+    Tname = Markdown.parse("```julia\n$(curmod_prefix)DocVars.T\n```"),
+    Sname = Markdown.parse("```julia\n$(curmod_prefix)DocVars.S\n```")
     # Splicing the expression directly doesn't work
     @test docstrings_equal(T.docs[Union{}],
         doc"""
diff --git a/test/errorshow.jl b/test/errorshow.jl
index 80352ddeaa9cf..db22fea1131d1 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -10,7 +10,8 @@ Base.Experimental.register_error_hint(Base.noncallable_number_hint_handler, Meth
 Base.Experimental.register_error_hint(Base.string_concatenation_hint_handler, MethodError)
 Base.Experimental.register_error_hint(Base.methods_on_iterable, MethodError)
 Base.Experimental.register_error_hint(Base.nonsetable_type_hint_handler, MethodError)
-Base.Experimental.register_error_hint(Base.fielderror_hint_handler, FieldError)
+Base.Experimental.register_error_hint(Base.fielderror_listfields_hint_handler, FieldError)
+Base.Experimental.register_error_hint(Base.fielderror_dict_hint_handler, FieldError)
 
 @testset "SystemError" begin
     err = try; systemerror("reason", Cint(0)); false; catch ex; ex; end::SystemError
@@ -738,8 +739,7 @@ end
 pop!(Base.Experimental._hint_handlers[DomainError])  # order is undefined, don't copy this
 
 struct ANumber <: Number end
-let err_str
-    err_str = @except_str ANumber()(3 + 4) MethodError
+let err_str = @except_str ANumber()(3 + 4) MethodError
     @test occursin("objects of type $(curmod_prefix)ANumber are not callable", err_str)
     @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
     # issue 40478
@@ -747,22 +747,25 @@ let err_str
     @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
 end
 
-let err_str
-    a = [1 2; 3 4];
+let a = [1 2; 3 4];
     err_str = @except_str (a[1][2] = 5) MethodError
     @test occursin("\nAre you trying to index into an array? For multi-dimensional arrays, separate the indices with commas: ", err_str)
     @test occursin("a[1, 2]", err_str)
     @test occursin("rather than a[1][2]", err_str)
 end
 
-let err_str
-    d = Dict
+let d = Dict
     err_str = @except_str (d[1] = 5) MethodError
     @test occursin("\nYou attempted to index the type Dict, rather than an instance of the type. Make sure you create the type using its constructor: ", err_str)
     @test occursin("d = Dict([...])", err_str)
     @test occursin(" rather than d = Dict", err_str)
 end
 
+let s = Some("foo")
+    err_str = @except_str (s[] = "bar") MethodError
+    @test !occursin("You attempted to index the type String", err_str)
+end
+
 # Execute backtrace once before checking formatting, see #38858
 backtrace()
 
@@ -808,12 +811,13 @@ end
 @test_throws ArgumentError("invalid index: \"foo\" of type String") [1]["foo"]
 @test_throws ArgumentError("invalid index: nothing of type Nothing") [1][nothing]
 
-# issue #53618
-@testset "FieldErrorHint" begin
+# issue #53618, pr #55165
+@testset "FieldErrorHints" begin
     struct FieldFoo
         a::Float32
         b::Int
     end
+    Base.propertynames(foo::FieldFoo) = (:a, :x, :y)
 
     s = FieldFoo(1, 2)
 
@@ -823,7 +827,9 @@ end
 
     # Check error message first
     errorMsg = sprint(Base.showerror, ex)
-    @test occursin("FieldError: type FieldFoo has no field c", errorMsg)
+    @test occursin("FieldError: type FieldFoo has no field `c`", errorMsg)
+    @test occursin("available fields: `a`, `b`", errorMsg)
+    @test occursin("Available properties: `x`, `y`", errorMsg)
 
     d = Dict(s => 1)
 
@@ -840,7 +846,7 @@ end
     ex = test.value::FieldError
 
     errorMsg = sprint(Base.showerror, ex)
-    @test occursin("FieldError: type Dict has no field c", errorMsg)
+    @test occursin("FieldError: type Dict has no field `c`", errorMsg)
     # Check hint message
     hintExpected = "Did you mean to access dict values using key: `:c` ? Consider using indexing syntax dict[:c]\n"
     @test occursin(hintExpected, errorMsg)
@@ -1079,6 +1085,12 @@ let err_str
     @test occursin("String concatenation is performed with *", err_str)
 end
 
+# https://github.com/JuliaLang/julia/issues/55745
+let err_str
+    err_str = @except_str +() MethodError
+    @test !occursin("String concatenation is performed with *", err_str)
+end
+
 struct MissingLength; end
 struct MissingSize; end
 Base.IteratorSize(::Type{MissingSize}) = Base.HasShape{2}()
diff --git a/test/file.jl b/test/file.jl
index 8c5c3ebc4b74b..a4262c4eaaa21 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -442,8 +442,7 @@ end
                 for pth in ("afile",
                             joinpath("afile", "not_file"),
                             SubString(joinpath(dir, "afile")),
-                            Base.RawFD(-1),
-                            -1)
+                            Base.RawFD(-1))
                     test_stat_error(stat, pth)
                     test_stat_error(lstat, pth)
                 end
@@ -824,6 +823,303 @@ mktempdir() do tmpdir
     rm(b_tmpdir)
 end
 
+@testset "rename" begin
+    # some of the windows specific behavior may be fixed in new versions of julia
+    mktempdir() do dir
+        # see if can make symlinks
+        local can_symlink = try
+            symlink("foo", joinpath(dir, "link"))
+            rm(joinpath(dir, "link"))
+            true
+        catch
+            false
+        end
+        local f1 = joinpath(dir, "file1")
+        local f2 = joinpath(dir, "file2")
+        local d1 = joinpath(dir, "dir1")
+        local d2 = joinpath(dir, "dir2")
+        local subd1f1 = joinpath(d1, "file1")
+        local subd1f2 = joinpath(d1, "file2")
+        local subd2f1 = joinpath(d2, "file1")
+        local subd2f2 = joinpath(d2, "file2")
+        local h1 = joinpath(dir, "hlink1")
+        local h2 = joinpath(dir, "hlink2")
+        local s1 = joinpath(dir, "slink1")
+        local s2 = joinpath(dir, "slink2")
+        @testset "renaming to non existing newpath in same directory" begin
+            # file, make sure isexecutable is copied
+            for mode in (0o644, 0o755)
+                write(f1, b"data")
+                chmod(f1, mode)
+                Base.rename(f1, f2)
+                @test !isfile(f1)
+                @test isfile(f2)
+                @test read(f2) == b"data"
+                if mode == 0o644
+                    @test !isexecutable(f2)
+                else
+                    @test isexecutable(f2)
+                end
+                rm(f2)
+            end
+            # empty directory
+            mkdir(d1)
+            Base.rename(d1, d2)
+            @test !isdir(d1)
+            @test isdir(d2)
+            @test isempty(readdir(d2))
+            rm(d2)
+            # non empty directory
+            mkdir(d1)
+            write(subd1f1, b"data")
+            chmod(subd1f1, 0o644)
+            write(subd1f2, b"exe")
+            chmod(subd1f2, 0o755)
+            Base.rename(d1, d2)
+            @test !isdir(d1)
+            @test isdir(d2)
+            @test read(subd2f1) == b"data"
+            @test read(subd2f2) == b"exe"
+            @test !isexecutable(subd2f1)
+            @test isexecutable(subd2f2)
+            rm(d2; recursive=true)
+            # hardlink
+            write(f1, b"data")
+            hardlink(f1, h1)
+            Base.rename(h1, h2)
+            @test isfile(f1)
+            @test !isfile(h1)
+            @test isfile(h2)
+            @test read(h2) == b"data"
+            write(h2, b"data2")
+            @test read(f1) == b"data2"
+            rm(h2)
+            rm(f1)
+            # symlink
+            if can_symlink
+                symlink("foo", s1)
+                Base.rename(s1, s2)
+                @test !islink(s1)
+                @test islink(s2)
+                @test readlink(s2) == "foo"
+                rm(s2)
+            end
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+
+        # Get the error code from failed rename, or nothing if it worked
+        function rename_errorcodes(oldpath, newpath)
+            try
+                Base.rename(oldpath, newpath)
+                nothing
+            catch e
+                e.code
+            end
+        end
+        @testset "errors" begin
+            # invalid paths
+            @test_throws ArgumentError Base.rename(f1*"\0", "")
+            @test Base.UV_ENOENT == rename_errorcodes("", "")
+            write(f1, b"data")
+            @test Base.UV_ENOENT == rename_errorcodes(f1, "")
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes("", f1)
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes(f2, f1)
+            @test read(f1) == b"data"
+            @test Base.UV_ENOENT == rename_errorcodes(f1, subd1f1)
+            @test read(f1) == b"data"
+            rm(f1)
+            # attempt to make a directory a subdirectory of itself
+            mkdir(d1)
+            if Sys.iswindows()
+                @test rename_errorcodes(d1, joinpath(d1, "subdir")) ∈ (Base.UV_EINVAL, Base.UV_EBUSY)
+            else
+                @test Base.UV_EINVAL == rename_errorcodes(d1, joinpath(d1, "subdir"))
+            end
+            rm(d1)
+            # rename to child of a file
+            mkdir(d1)
+            write(f2, "foo")
+            if Sys.iswindows()
+                @test Base.UV_EINVAL == rename_errorcodes(d1, joinpath(f2, "subdir"))
+            else
+                @test Base.UV_ENOTDIR == rename_errorcodes(d1, joinpath(f2, "subdir"))
+            end
+            # replace a file with a directory
+            if !Sys.iswindows()
+                @test Base.UV_ENOTDIR == rename_errorcodes(d1, f2)
+            else
+                # this should work on windows
+                Base.rename(d1, f2)
+                @test isdir(f2)
+                @test !ispath(d1)
+            end
+            rm(f2; force=true)
+            rm(d1; force=true)
+            # symlink loop
+            if can_symlink
+                symlink(s1, s2)
+                symlink(s2, s1)
+                @test Base.UV_ELOOP == rename_errorcodes(joinpath(s1, "foo"), f2)
+                write(f2, b"data")
+                @test Base.UV_ELOOP == rename_errorcodes(f2, joinpath(s1, "foo"))
+                rm(s1)
+                rm(s2)
+                rm(f2)
+            end
+            # newpath is a nonempty directory
+            mkdir(d1)
+            mkdir(d2)
+            write(subd2f1, b"data")
+            write(f1, b"otherdata")
+            if Sys.iswindows()
+                @test Base.UV_EACCES == rename_errorcodes(f1, d1)
+                @test Base.UV_EACCES == rename_errorcodes(f1, d2)
+                @test Base.UV_EACCES == rename_errorcodes(d1, d2)
+                @test Base.UV_EACCES == rename_errorcodes(subd2f1, d2)
+            else
+                @test Base.UV_EISDIR == rename_errorcodes(f1, d1)
+                @test Base.UV_EISDIR == rename_errorcodes(f1, d2)
+                @test rename_errorcodes(d1, d2) ∈ (Base.UV_ENOTEMPTY, Base.UV_EEXIST)
+                @test rename_errorcodes(subd2f1, d2) ∈ (Base.UV_ENOTEMPTY, Base.UV_EEXIST, Base.UV_EISDIR)
+            end
+            rm(f1)
+            rm(d1)
+            rm(d2; recursive=true)
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+
+        @testset "replacing existing file" begin
+            write(f2, b"olddata")
+            chmod(f2, 0o755)
+            write(f1, b"newdata")
+            chmod(f1, 0o644)
+            @test isexecutable(f2)
+            @test !isexecutable(f1)
+            Base.rename(f1, f2)
+            @test !ispath(f1)
+            @test read(f2) == b"newdata"
+            @test !isexecutable(f2)
+            rm(f2)
+        end
+
+        @testset "replacing file with itself" begin
+            write(f1, b"data")
+            Base.rename(f1, f1)
+            @test read(f1) == b"data"
+            hardlink(f1, h1)
+            Base.rename(f1, h1)
+            if Sys.iswindows()
+                # On Windows f1 gets deleted
+                @test !ispath(f1)
+            else
+                @test read(f1) == b"data"
+            end
+            @test read(h1) == b"data"
+            rm(h1)
+            rm(f1; force=true)
+        end
+
+        @testset "replacing existing file in different directories" begin
+            mkdir(d1)
+            mkdir(d2)
+            write(subd2f2, b"olddata")
+            chmod(subd2f2, 0o755)
+            write(subd1f1, b"newdata")
+            chmod(subd1f1, 0o644)
+            @test isexecutable(subd2f2)
+            @test !isexecutable(subd1f1)
+            Base.rename(subd1f1, subd2f2)
+            @test !ispath(subd1f1)
+            @test read(subd2f2) == b"newdata"
+            @test !isexecutable(subd2f2)
+            @test isdir(d1)
+            @test isdir(d2)
+            rm(d1; recursive=true)
+            rm(d2; recursive=true)
+        end
+
+        @testset "rename with open files" begin
+            # both open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f1) do handle1
+                open(f2) do handle2
+                    if Sys.iswindows()
+                        # currently this doesn't work on windows
+                        @test Base.UV_EBUSY == rename_errorcodes(f1, f2)
+                    else
+                        Base.rename(f1, f2)
+                        @test !ispath(f1)
+                        @test read(f2) == b"newdata"
+                    end
+                    # rename doesn't break already opened files
+                    @test read(handle1) == b"newdata"
+                    @test read(handle2) == b"olddata"
+                end
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+
+            # oldpath open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f1) do handle1
+                if Sys.iswindows()
+                    # currently this doesn't work on windows
+                    @test Base.UV_EBUSY == rename_errorcodes(f1, f2)
+                else
+                    Base.rename(f1, f2)
+                    @test !ispath(f1)
+                    @test read(f2) == b"newdata"
+                end
+                # rename doesn't break already opened files
+                @test read(handle1) == b"newdata"
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+
+            # newpath open
+            write(f2, b"olddata")
+            write(f1, b"newdata")
+            open(f2) do handle2
+                if Sys.iswindows()
+                    # currently this doesn't work on windows
+                    @test Base.UV_EACCES == rename_errorcodes(f1, f2)
+                else
+                    Base.rename(f1, f2)
+                    @test !ispath(f1)
+                    @test read(f2) == b"newdata"
+                end
+                # rename doesn't break already opened files
+                @test read(handle2) == b"olddata"
+            end
+            rm(f1; force=true)
+            rm(f2; force=true)
+        end
+
+        @testset "replacing empty directory with directory" begin
+            mkdir(d1)
+            mkdir(d2)
+            write(subd1f1, b"data")
+            if Sys.iswindows()
+                # currently this doesn't work on windows
+                @test Base.UV_EACCES == rename_errorcodes(d1, d2)
+                rm(d1; recursive=true)
+                rm(d2)
+            else
+                Base.rename(d1, d2)
+                @test isdir(d2)
+                @test read(subd2f1) == b"data"
+                @test !ispath(d1)
+                rm(d2; recursive=true)
+            end
+        end
+        @test isempty(readdir(dir)) # make sure everything got cleaned up
+    end
+end
+
 # issue #10506 #10434
 ## Tests for directories and links to directories
 if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
@@ -1032,7 +1328,7 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
         @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=false)
         @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=true)
         # mv
-        @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
+        @test_throws Base._UVError("rename($(repr(nonexisting_src)), $(repr(dst)))", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
     end
 end
 
@@ -1473,7 +1769,7 @@ rm(dir)
 
 
 ##################
-# Return values of mkpath, mkdir, cp, mv and touch
+# Return values of mkpath, mkdir, cp, mv, rename and touch
 ####################
 mktempdir() do dir
     name1 = joinpath(dir, "apples")
@@ -1490,8 +1786,11 @@ mktempdir() do dir
     @test cp(name2, name1) == name1
     @test isfile(name1)
     @test isfile(name2)
+    @test Base.rename(name1, name2) == name2
+    @test !ispath(name1)
+    @test isfile(name2)
     namedir = joinpath(dir, "chalk")
-    namepath = joinpath(dir, "chalk","cheese","fresh")
+    namepath = joinpath(dir, "chalk", "cheese", "fresh")
     @test !ispath(namedir)
     @test mkdir(namedir) == namedir
     @test isdir(namedir)
@@ -1500,7 +1799,12 @@ mktempdir() do dir
     @test isdir(namepath)
     @test mkpath(namepath) == namepath
     @test isdir(namepath)
+    # issue 54826
+    namepath_dirpath = joinpath(dir, "x", "y", "z", "")
+    @test mkpath(namepath_dirpath) == namepath_dirpath
 end
+@test mkpath("") == ""
+@test mkpath("/") == "/"
 
 # issue #30588
 @test realpath(".") == realpath(pwd())
@@ -1604,6 +1908,26 @@ end
     end
 end
 
+@testset "pwd tests" begin
+    mktempdir() do dir
+        cd(dir) do
+            withenv("OLDPWD" => nothing) do
+                io = IOBuffer()
+                Base.repl_cmd(@cmd("cd"), io)
+                Base.repl_cmd(@cmd("cd -"), io)
+                @test realpath(pwd()) == realpath(dir)
+                if !Sys.iswindows()
+                    # Delete the working directory and check we can cd out of it
+                    # Cannot delete the working directory on Windows
+                    rm(dir)
+                    @test_throws Base._UVError("pwd()", Base.UV_ENOENT) pwd()
+                    Base.repl_cmd(@cmd("cd \\~"), io)
+                end
+            end
+        end
+    end
+end
+
 @testset "readdir tests" begin
     ≛(a, b) = sort(a) == sort(b)
     mktempdir() do dir
@@ -1749,8 +2073,18 @@ end
     @test s.blocks isa Int64
     @test s.mtime isa Float64
     @test s.ctime isa Float64
+
+    @test s === stat((f,))
+    @test s === lstat((f,))
+    @test s === stat(".", f)
+    @test s === lstat(".", f)
 end
 
+mutable struct URI50890; f::String; end
+Base.joinpath(x::URI50890) = URI50890(x.f)
+@test_throws "stat not implemented" stat(URI50890("."))
+@test_throws "lstat not implemented" lstat(URI50890("."))
+
 @testset "StatStruct show's extended details" begin
     f, io = mktemp()
     s = stat(f)
@@ -1794,6 +2128,16 @@ end
         @test !isnothing(Base.Filesystem.getusername(s.uid))
         @test !isnothing(Base.Filesystem.getgroupname(s.gid))
     end
+    s = Base.Filesystem.StatStruct()
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(\"\" ENOENT: ") && endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for \"\"\n ENOENT: ") && !endswith(stat_show_str_multi, r"\s")
+    s = Base.Filesystem.StatStruct("my/test", Ptr{UInt8}(0), Int32(Base.UV_ENOTDIR))
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(\"my/test\" ENOTDIR: ") && endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for \"my/test\"\n ENOTDIR: ") && !endswith(stat_show_str_multi, r"\s")
 end
 
 @testset "diskstat() works" begin
diff --git a/test/filesystem.jl b/test/filesystem.jl
index 870350dee9f35..036a3dda30cca 100644
--- a/test/filesystem.jl
+++ b/test/filesystem.jl
@@ -44,7 +44,7 @@ end
 @testset "Base.Filesystem docstrings" begin
     undoc = Docs.undocumented_names(Base.Filesystem)
     @test_broken isempty(undoc)
-    @test undoc == [:File, :Filesystem, :cptree, :futime, :rename, :sendfile, :unlink]
+    @test undoc == [:File, :Filesystem, :cptree, :futime, :sendfile, :unlink]
 end
 
 @testset "write return type" begin
diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl
index f33ec75b58322..d5d697634bcfa 100644
--- a/test/floatfuncs.jl
+++ b/test/floatfuncs.jl
@@ -257,6 +257,35 @@ end
     end
 end
 
+@testset "isapprox and unsigned integers" begin
+    for T in Base.BitUnsigned_types
+        # Test also combinations of different integer types
+        W = widen(T)
+        # The order of the operands for difference between unsigned integers is
+        # very important, test both combinations.
+        @test isapprox(T(42), T(42); rtol=T(0), atol=0.5)
+        @test isapprox(T(42), W(42); rtol=T(0), atol=0.5)
+        @test !isapprox(T(0), T(1); rtol=T(0), atol=0.5)
+        @test !isapprox(T(1), T(0); rtol=T(0), atol=0.5)
+        @test isapprox(T(1), T(3); atol=T(2))
+        @test isapprox(T(4), T(2); atol=T(2))
+        @test isapprox(T(1), W(3); atol=T(2))
+        @test isapprox(T(4), W(2); atol=T(2))
+        @test isapprox(T(5), T(7); atol=typemax(T))
+        @test isapprox(T(8), T(6); atol=typemax(T))
+        @test isapprox(T(1), T(2); rtol=1)
+        @test isapprox(T(6), T(3); rtol=1)
+        @test isapprox(T(1), W(2); rtol=1)
+        @test isapprox(T(6), W(3); rtol=1)
+        @test !isapprox(typemin(T), typemax(T))
+        @test !isapprox(typemax(T), typemin(T))
+        @test !isapprox(typemin(T), typemax(T); atol=typemax(T)-T(1))
+        @test !isapprox(typemax(T), typemin(T); atol=typemax(T)-T(1))
+        @test isapprox(typemin(T), typemax(T); atol=typemax(T))
+        @test isapprox(typemax(T), typemin(T); atol=typemax(T))
+    end
+end
+
 @testset "Conversion from floating point to unsigned integer near extremes (#51063)" begin
     @test_throws InexactError UInt32(4.2949673f9)
     @test_throws InexactError UInt64(1.8446744f19)
diff --git a/test/functional.jl b/test/functional.jl
index 3436fb8911cc1..84c4098308ebd 100644
--- a/test/functional.jl
+++ b/test/functional.jl
@@ -235,3 +235,129 @@ end
 let (:)(a,b) = (i for i in Base.:(:)(1,10) if i%2==0)
     @test Int8[ i for i = 1:2 ] == [2,4,6,8,10]
 end
+
+@testset "Basic tests of Fix1, Fix2, and Fix" begin
+    function test_fix1(Fix1=Base.Fix1)
+        increment = Fix1(+, 1)
+        @test increment(5) == 6
+        @test increment(-1) == 0
+        @test increment(0) == 1
+        @test map(increment, [1, 2, 3]) == [2, 3, 4]
+
+        concat_with_hello = Fix1(*, "Hello ")
+        @test concat_with_hello("World!") == "Hello World!"
+        # Make sure inference is good:
+        @inferred concat_with_hello("World!")
+
+        one_divided_by = Fix1(/, 1)
+        @test one_divided_by(10) == 1/10.0
+        @test one_divided_by(-5) == 1/-5.0
+
+        return nothing
+    end
+
+    function test_fix2(Fix2=Base.Fix2)
+        return_second = Fix2((x, y) -> y, 999)
+        @test return_second(10) == 999
+        @inferred return_second(10)
+        @test return_second(-5) == 999
+
+        divide_by_two = Fix2(/, 2)
+        @test map(divide_by_two, (2, 4, 6)) == (1.0, 2.0, 3.0)
+        @inferred map(divide_by_two, (2, 4, 6))
+
+        concat_with_world = Fix2(*, " World!")
+        @test concat_with_world("Hello") == "Hello World!"
+        @inferred concat_with_world("Hello World!")
+
+        return nothing
+    end
+
+    # Test with normal Base.Fix1 and Base.Fix2
+    test_fix1()
+    test_fix2()
+
+    # Now, repeat the Fix1 and Fix2 tests, but
+    # with a Fix lambda function used in their place
+    test_fix1((op, arg) -> Base.Fix{1}(op, arg))
+    test_fix2((op, arg) -> Base.Fix{2}(op, arg))
+
+    # Now, we do more complex tests of Fix:
+    let Fix=Base.Fix
+        @testset "Argument Fixation" begin
+            let f = (x, y, z) -> x + y * z
+                fixed_f1 = Fix{1}(f, 10)
+                @test fixed_f1(2, 3) == 10 + 2 * 3
+
+                fixed_f2 = Fix{2}(f, 5)
+                @test fixed_f2(1, 4) == 1 + 5 * 4
+
+                fixed_f3 = Fix{3}(f, 3)
+                @test fixed_f3(1, 2) == 1 + 2 * 3
+            end
+        end
+        @testset "Helpful errors" begin
+            let g = (x, y) -> x - y
+                # Test minimum N
+                fixed_g1 = Fix{1}(g, 100)
+                @test fixed_g1(40) == 100 - 40
+
+                # Test maximum N
+                fixed_g2 = Fix{2}(g, 100)
+                @test fixed_g2(150) == 150 - 100
+
+                # One over
+                fixed_g3 = Fix{3}(g, 100)
+                @test_throws ArgumentError("expected at least 2 arguments to `Fix{3}`, but got 1") fixed_g3(1)
+            end
+        end
+        @testset "Type Stability and Inference" begin
+            let h = (x, y) -> x / y
+                fixed_h = Fix{2}(h, 2.0)
+                @test @inferred(fixed_h(4.0)) == 2.0
+            end
+        end
+        @testset "Interaction with varargs" begin
+            vararg_f = (x, y, z...) -> x + 10 * y + sum(z; init=zero(x))
+            fixed_vararg_f = Fix{2}(vararg_f, 6)
+
+            # Can call with variable number of arguments:
+            @test fixed_vararg_f(1, 2, 3, 4) == 1 + 10 * 6 + sum((2, 3, 4))
+            @inferred fixed_vararg_f(1, 2, 3, 4)
+            @test fixed_vararg_f(5) == 5 + 10 * 6
+            @inferred fixed_vararg_f(5)
+        end
+        @testset "Errors should propagate normally" begin
+            error_f = (x, y) -> sin(x * y)
+            fixed_error_f = Fix{2}(error_f, Inf)
+            @test_throws DomainError fixed_error_f(10)
+        end
+        @testset "Chaining Fix together" begin
+            f1 = Fix{1}(*, "1")
+            f2 = Fix{1}(f1, "2")
+            f3 = Fix{1}(f2, "3")
+            @test f3() == "123"
+
+            g1 = Fix{2}(*, "1")
+            g2 = Fix{2}(g1, "2")
+            g3 = Fix{2}(g2, "3")
+            @test g3("") == "123"
+        end
+        @testset "Zero arguments" begin
+            f = Fix{1}(x -> x, 'a')
+            @test f() == 'a'
+        end
+        @testset "Dummy-proofing" begin
+            @test_throws ArgumentError("expected `N` in `Fix{N}` to be integer greater than 0, but got 0") Fix{0}(>, 1)
+            @test_throws ArgumentError("expected type parameter in `Fix` to be `Int`, but got `0.5::Float64`") Fix{0.5}(>, 1)
+            @test_throws ArgumentError("expected type parameter in `Fix` to be `Int`, but got `1::UInt64`") Fix{UInt64(1)}(>, 1)
+        end
+        @testset "Specialize to structs not in `Base`" begin
+            struct MyStruct
+                x::Int
+            end
+            f = Fix{1}(MyStruct, 1)
+            @test f isa Fix{1,Type{MyStruct},Int}
+        end
+    end
+end
diff --git a/test/gc.jl b/test/gc.jl
index f924f4952cfb0..c532f17f04eb5 100644
--- a/test/gc.jl
+++ b/test/gc.jl
@@ -49,6 +49,13 @@ function issue_54275_test()
     @test !live_bytes_has_grown_too_much
 end
 
+function full_sweep_reasons_test()
+    GC.gc()
+    reasons = Base.full_sweep_reasons()
+    @test reasons[:FULL_SWEEP_REASON_FORCED_FULL_SWEEP] >= 1
+    @test keys(reasons) == Set(Base.FULL_SWEEP_REASONS)
+end
+
 # !!! note:
 #     Since we run our tests on 32bit OS as well we confine ourselves
 #     to parameters that allocate about 512MB of objects. Max RSS is lower
@@ -72,3 +79,21 @@ end
 @testset "Base.GC docstrings" begin
     @test isempty(Docs.undocumented_names(GC))
 end
+
+@testset "Full GC reasons" begin
+    full_sweep_reasons_test()
+end
+
+#testset doesn't work here because this needs to run in top level
+#Check that we ensure objects in toplevel exprs are rooted
+global dims54422 = [] # allocate the Binding
+GC.gc(); GC.gc(); # force the binding to be old
+GC.enable(false); # prevent new objects from being old
+@eval begin
+    Base.Experimental.@force_compile # use the compiler
+    dims54422 = $([])
+    nothing
+end
+GC.enable(true); GC.gc(false) # incremental collection
+@test typeof(dims54422) == Vector{Any}
+@test isempty(dims54422)
diff --git a/test/generic_map_tests.jl b/test/generic_map_tests.jl
index b155370dd6465..7f19d60fe31fb 100644
--- a/test/generic_map_tests.jl
+++ b/test/generic_map_tests.jl
@@ -43,7 +43,7 @@ function generic_map_tests(mapf, inplace_mapf=nothing)
     @test mapf(f, Int[], Int[], Complex{Int}[]) == Union{}[]
 
     # In-place map
-    if inplace_mapf != nothing
+    if inplace_mapf !== nothing
         A = Float64[1:10...]
         inplace_mapf(x -> x*x, A, A)
         @test A == map(x -> x*x, Float64[1:10...])
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index deb1dd10681e8..6f1bde69dddfe 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -616,3 +616,20 @@ end
 @test Base.infer_effects(gcdx, (Int,Int)) |> Core.Compiler.is_foldable
 @test Base.infer_effects(invmod, (Int,Int)) |> Core.Compiler.is_foldable
 @test Base.infer_effects(binomial, (Int,Int)) |> Core.Compiler.is_foldable
+
+@testset "literal power" begin
+    @testset for T in Base.uniontypes(Base.HWReal)
+        ns = (T(0), T(1), T(5))
+        if T <: AbstractFloat
+            ns = (ns..., T(3.14), T(-2.71))
+        end
+        for n in ns
+            @test n ^ 0 === T(1)
+            @test n ^ 1 === n
+            @test n ^ 2 === n * n
+            @test n ^ 3 === n * n * n
+            @test n ^ -1 ≈ inv(n)
+            @test n ^ -2 ≈ inv(n) * inv(n)
+        end
+    end
+end
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index e61354fe4f7f3..7a63cd1c0a62e 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -197,8 +197,8 @@ for order in (:not_atomic, :monotonic, :acquire, :release, :acquire_release, :se
     @test (order -> Core.Intrinsics.atomic_fence(order))(order) === nothing
     @test Base.invokelatest(@eval () -> Core.Intrinsics.atomic_fence($(QuoteNode(order)))) === nothing
 end
-@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) == nothing
-@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) == nothing
+@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) === nothing
+@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) === nothing
 
 primitive type Int256 <: Signed 256 end
 Int256(i::Int) = Core.Intrinsics.sext_int(Int256, i)
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index 0e74595d29d20..b5b34a2dbed8c 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -351,7 +351,7 @@ end
     a = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
     mark(a) # mark at position 0
     write(a, "Hello!")
-    @test Base.compact(a) == nothing # because pointer > mark
+    @test Base.compact(a) === nothing # because pointer > mark
     close(a)
     b = Base.GenericIOBuffer(UInt8[], true, true, false, true, typemax(Int))
     write(b, "Hello!")
diff --git a/test/iostream.jl b/test/iostream.jl
index 4ba2423f0f558..13d01e61bbf8c 100644
--- a/test/iostream.jl
+++ b/test/iostream.jl
@@ -190,3 +190,7 @@ end
     @test all(T -> T <: Union{UInt, Int}, Base.return_types(unsafe_write, (IO, Ptr{UInt8}, UInt)))
     @test all(T -> T === Bool, Base.return_types(eof, (IO,)))
 end
+
+@testset "fd" begin
+    @test open(fd, tempname(), "w") isa RawFD
+end
diff --git a/test/iterators.jl b/test/iterators.jl
index 95275195cd7c0..0df4d9afd371a 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -499,7 +499,7 @@ end
 @test Base.IteratorSize(product(1:2, countfrom(1))) == Base.IsInfinite()
 
 @test Base.iterate(product()) == ((), true)
-@test Base.iterate(product(), 1) == nothing
+@test Base.iterate(product(), 1) === nothing
 
 # intersection
 @test intersect(product(1:3, 4:6), product(2:4, 3:5)) == Iterators.ProductIterator((2:3, 4:5))
@@ -993,7 +993,7 @@ end
 end
 
 @testset "Iterators.peel" begin
-    @test Iterators.peel([]) == nothing
+    @test Iterators.peel([]) === nothing
     @test Iterators.peel(1:10)[1] == 1
     @test Iterators.peel(1:10)[2] |> collect == 2:10
     @test Iterators.peel(x^2 for x in 2:4)[1] == 4
diff --git a/test/llvmcall.jl b/test/llvmcall.jl
index 98968bfcdf8bc..c83ac05b1ec48 100644
--- a/test/llvmcall.jl
+++ b/test/llvmcall.jl
@@ -70,13 +70,13 @@ end
        ret i32 %3""", Int32, Tuple{Int32, Int32},
         Int32(1), Int32(2))) # llvmcall must be compiled to be called
 
-# Test whether declarations work properly
+#Since LLVM 18, LLVM does a best effort to automatically include the intrinsics
 function undeclared_ceil(x::Float64)
     llvmcall("""%2 = call double @llvm.ceil.f64(double %0)
         ret double %2""", Float64, Tuple{Float64}, x)
 end
-@test_throws ErrorException undeclared_ceil(4.2)
-@test_throws ErrorException undeclared_ceil(4.2)
+@test undeclared_ceil(4.2) == 5.0
+@test undeclared_ceil(4.2) == 5.0
 
 function declared_floor(x::Float64)
     llvmcall(
diff --git a/test/llvmpasses/alloc-opt-bits.ll b/test/llvmpasses/alloc-opt-bits.ll
new file mode 100644
index 0000000000000..e19093f46f815
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-bits.ll
@@ -0,0 +1,37 @@
+; This file is a part of Julia. License is MIT: https://julialang.org/license
+
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S %s | FileCheck %s 
+
+
+@tag = external addrspace(10) global {}
+
+@glob = external addrspace(10) global {}
+
+; Test that the gc_preserve intrinsics are deleted directly.
+
+; CHECK-LABEL: @ptr_and_bits
+; CHECK-NOT: alloca 
+; CHECK: call noalias ptr addrspace(10) @julia.gc_alloc_obj
+
+define void @ptr_and_bits(ptr %fptr, i1 %b, i1 %b2, i32 %idx) {
+  %pgcstack = call ptr @julia.get_pgcstack()
+  %ptls = call ptr @julia.ptls_states()
+  %ptls_i8 = bitcast ptr %ptls to ptr
+  %v = call noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 16, ptr addrspace(10) @tag)
+  
+  %g0 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 1
+  store ptr addrspace(10) @glob, ptr addrspace(10) %g0
+  
+  %g1 = getelementptr { i64, ptr addrspace(10) }, ptr addrspace(10) %v, i32 %idx, i32 0
+  store i64 7, ptr addrspace(10) %g1
+
+  %res = load ptr addrspace(10), ptr addrspace(10) %g0
+  %res2 = load i64, ptr addrspace(10) %g1
+  ret void
+}
+
+declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
+
+declare ptr @julia.ptls_states()
+
+declare ptr @julia.get_pgcstack()
diff --git a/test/llvmpasses/alloc-opt-gcframe.ll b/test/llvmpasses/alloc-opt-gcframe.ll
index 3eaa0a871d029..f53a4d5c01df7 100644
--- a/test/llvmpasses/alloc-opt-gcframe.ll
+++ b/test/llvmpasses/alloc-opt-gcframe.ll
@@ -10,9 +10,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK-NOT: @julia.gc_alloc_obj
 
 ; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %gcstack, i64 -12
-; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
+; OPAQUE: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
 ; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
+; OPAQUE-NEXT: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc(ptr [[ptls_load]], i32 [[SIZE_T:[0-9]+]], i32 16, i64 {{.*}} @tag {{.*}})
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 
 define {} addrspace(10)* @return_obj() {
@@ -27,7 +27,7 @@ define {} addrspace(10)* @return_obj() {
 ; CHECK-LABEL: @return_load
 ; CHECK: alloca i64
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
 ; CHECK-NOT: @tag
 ; CHECK-NOT: @llvm.lifetime.end
@@ -48,7 +48,7 @@ define i64 @return_load(i64 %i) {
 ; CHECK-LABEL: @ccall_obj
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK: @ijl_gc_pool_alloc
+; CHECK: @ijl_gc_small_alloc
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_obj(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -65,7 +65,7 @@ define void @ccall_obj(i8* %fptr) {
 ; CHECK: alloca i64
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.lifetime.start{{.*}}(i64 8, ptr
 ; OPAQUE: %f = bitcast ptr %fptr to ptr
 ; Currently the GC frame lowering pass strips away all operand bundles
@@ -88,7 +88,7 @@ define void @ccall_ptr(i8* %fptr) {
 ; CHECK-LABEL: @ccall_unknown_bundle
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK: @ijl_gc_pool_alloc
+; CHECK: @ijl_gc_small_alloc
 ; OPAQUE: store atomic ptr addrspace(10) @tag, ptr addrspace(10) {{.*}} unordered, align 8, !tbaa !4
 define void @ccall_unknown_bundle(i8* %fptr) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -151,7 +151,7 @@ L3:
 ; CHECK-LABEL: @object_field
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4
 define void @object_field({} addrspace(10)* %field) {
   %pgcstack = call {}*** @julia.get_pgcstack()
@@ -169,7 +169,7 @@ define void @object_field({} addrspace(10)* %field) {
 ; CHECK: alloca [16 x i8], align 16
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; OPAQUE: call void @llvm.memcpy.p0.p0.i64
 define void @memcpy_opt(i8* %v22) {
 top:
@@ -187,7 +187,7 @@ top:
 ; CHECK-LABEL: @preserve_opt
 ; OPAQUE: call ptr @julia.get_pgcstack()
 ; CHECK-NOT: @julia.gc_alloc_obj
-; CHECK-NOT: @jl_gc_pool_alloc
+; CHECK-NOT: @jl_gc_small_alloc
 ; CHECK-NOT: @llvm.lifetime.end
 ; CHECK: @external_function
 define void @preserve_opt(i8* %v22) {
@@ -238,7 +238,7 @@ L3:
 }
 ; CHECK-LABEL: }{{$}}
 
-; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_pool_alloc(ptr,
+; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_small_alloc(ptr,
 ; OPAQUE: declare noalias nonnull ptr addrspace(10) @ijl_gc_big_alloc(ptr,
 declare void @external_function()
 declare {}*** @julia.get_pgcstack()
diff --git a/test/llvmpasses/alloc-opt-pipeline.jl b/test/llvmpasses/alloc-opt-pipeline.jl
index 9437913e4054b..e84348ec4a8c6 100644
--- a/test/llvmpasses/alloc-opt-pipeline.jl
+++ b/test/llvmpasses/alloc-opt-pipeline.jl
@@ -17,7 +17,7 @@ end
 
 # CHECK-LABEL: @julia_haszerolayout
 # CHECK: top:
-# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK-NOT: @jl_gc_small_alloc
 # CHECK: extractelement
 # CHECK: ret i8
 emit(haszerolayout, NTuple{32,VecElement{UInt8}})
diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl
index dd0892be56a0b..3c4c1d491ec28 100644
--- a/test/llvmpasses/fastmath.jl
+++ b/test/llvmpasses/fastmath.jl
@@ -16,29 +16,3 @@ import Base.FastMath
 
 # CHECK: call fast float @llvm.sqrt.f32(float %"x::Float32")
 emit(FastMath.sqrt_fast, Float32)
-
-
-# Float16 operations should be performed as Float32, unless @fastmath is specified
-# TODO: this is not true for platforms that natively support Float16
-
-foo(x::T,y::T) where T = x-y == zero(T)
-# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{
-# CHECK-DAG: %[[XEXT:[0-9]+]] = fpext half %[[X]] to float
-# CHECK-DAG: %[[YEXT:[0-9]+]] = fpext half %[[Y]] to float
-# CHECK: %[[DIFF:[0-9]+]] = fsub float %[[XEXT]], %[[YEXT]]
-# CHECK: %[[TRUNC:[0-9]+]] = fptrunc float %[[DIFF]] to half
-# CHECK: %[[DIFFEXT:[0-9]+]] = fpext half %[[TRUNC]] to float
-# CHECK: %[[CMP:[0-9]+]] = fcmp oeq float %[[DIFFEXT]], 0.000000e+00
-# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8
-# CHECK: ret i8 %[[ZEXT]]
-# CHECK: }
-emit(foo, Float16, Float16)
-
-@fastmath foo(x::T,y::T) where T = x-y == zero(T)
-# CHECK: define {{(swiftcc )?}}i8 @julia_foo_{{[0-9]+}}({{.*}}half %[[X:"x::Float16"]], half %[[Y:"y::Float16"]]) {{.*}}{
-# CHECK: %[[DIFF:[0-9]+]] = fsub fast half %[[X]], %[[Y]]
-# CHECK: %[[CMP:[0-9]+]] = fcmp fast oeq half %[[DIFF]], 0xH0000
-# CHECK: %[[ZEXT:[0-9]+]] = zext i1 %[[CMP]] to i8
-# CHECK: ret i8 %[[ZEXT]]
-# CHECK: }
-emit(foo, Float16, Float16)
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index 705e57af94b7d..f8e123fdc6aea 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -58,7 +58,7 @@ top:
   %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_pool_alloc
+; OPAQUE: %v = call noalias nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) ptr addrspace(10) @ijl_gc_small_alloc
   %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8, i64 12341234)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
diff --git a/test/llvmpasses/float16.ll b/test/llvmpasses/float16.ll
index 33069c71179ed..d1dfb6aca11dd 100644
--- a/test/llvmpasses/float16.ll
+++ b/test/llvmpasses/float16.ll
@@ -99,7 +99,7 @@ top:
   ret half %13
 }
 
-define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) {
+define bfloat @demote_bfloat_test(bfloat %a, bfloat %b) #2 {
 top:
 ; CHECK-LABEL: @demote_bfloat_test(
 ; CHECK-NEXT:  top:
@@ -160,5 +160,70 @@ top:
   ret bfloat %13
 }
 
-attributes #0 = { "target-features"="-avx512fp16" }
-attributes #1 = { "target-features"="+avx512fp16" }
+define bfloat @native_bfloat_test(bfloat %a, bfloat %b) #3 {
+top:
+; CHECK-LABEL: @native_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %0 = fadd bfloat %a, %b
+; CHECK-NEXT:    %1 = fadd bfloat %0, %b
+; CHECK-NEXT:    %2 = fadd bfloat %1, %b
+; CHECK-NEXT:    %3 = fmul bfloat %2, %b
+; CHECK-NEXT:    %4 = fdiv bfloat %3, %b
+; CHECK-NEXT:    %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+; CHECK-NEXT:    %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+; CHECK-NEXT:    %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+; CHECK-NEXT:    %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+; CHECK-NEXT:    %9 = fadd <2 x bfloat> %6, %8
+; CHECK-NEXT:    %10 = extractelement <2 x bfloat> %9, i32 0
+; CHECK-NEXT:    %11 = extractelement <2 x bfloat> %9, i32 1
+; CHECK-NEXT:    %12 = fadd bfloat %10, %11
+; CHECK-NEXT:    %13 = fadd bfloat %12, %4
+; CHECK-NEXT:    ret bfloat %13
+;
+  %0 = fadd bfloat %a, %b
+  %1 = fadd bfloat %0, %b
+  %2 = fadd bfloat %1, %b
+  %3 = fmul bfloat %2, %b
+  %4 = fdiv bfloat %3, %b
+  %5 = insertelement <2 x bfloat> undef, bfloat %a, i32 0
+  %6 = insertelement <2 x bfloat> %5, bfloat %b, i32 1
+  %7 = insertelement <2 x bfloat> undef, bfloat %b, i32 0
+  %8 = insertelement <2 x bfloat> %7, bfloat %b, i32 1
+  %9 = fadd <2 x bfloat> %6, %8
+  %10 = extractelement <2 x bfloat> %9, i32 0
+  %11 = extractelement <2 x bfloat> %9, i32 1
+  %12 = fadd bfloat %10, %11
+  %13 = fadd bfloat %12, %4
+  ret bfloat %13
+}
+
+define i1 @fast_half_test(half %0, half %1) #0 {
+top:
+; CHECK-LABEL: @fast_half_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast half %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq half %2, 0xH0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast half %0, %1
+  %3 = fcmp fast oeq half %2, 0xH0000
+  ret i1 %3
+}
+
+define i1 @fast_bfloat_test(bfloat %0, bfloat %1) #2 {
+top:
+; CHECK-LABEL: @fast_bfloat_test(
+; CHECK-NEXT:  top:
+; CHECK-NEXT:    %2 = fsub fast bfloat %0, %1
+; CHECK-NEXT:    %3 = fcmp fast oeq bfloat %2, 0xR0000
+; CHECK-NEXT:    ret i1 %3
+;
+  %2 = fsub fast bfloat %0, %1
+  %3 = fcmp fast oeq bfloat %2, 0xR0000
+  ret i1 %3
+}
+
+attributes #0 = { "julia.hasfp16"="false" }
+attributes #1 = { "julia.hasfp16"="true" }
+attributes #2 = { "julia.hasbf16"="false" }
+attributes #3 = { "julia.hasbf16"="true" }
diff --git a/test/llvmpasses/julia-licm-fail.ll b/test/llvmpasses/julia-licm-fail.ll
index 4b39a8f85050b..76ce19af96e94 100644
--- a/test/llvmpasses/julia-licm-fail.ll
+++ b/test/llvmpasses/julia-licm-fail.ll
@@ -73,7 +73,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32, i8*) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-licm-missed.ll b/test/llvmpasses/julia-licm-missed.ll
index 3d4c9112c857b..37a547c9861b7 100644
--- a/test/llvmpasses/julia-licm-missed.ll
+++ b/test/llvmpasses/julia-licm-missed.ll
@@ -86,7 +86,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32, i8*) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll
index 84c990140ce10..732b62788f13c 100644
--- a/test/llvmpasses/julia-licm.ll
+++ b/test/llvmpasses/julia-licm.ll
@@ -152,7 +152,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
 declare void @ijl_gc_queue_root({} addrspace(10)*) #3
 
 ; Function Attrs: allocsize(1)
-declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32, i8*) #1
+declare noalias nonnull {} addrspace(10)* @ijl_gc_small_alloc(i8*, i32, i32, i8*) #1
 
 ; Function Attrs: allocsize(1)
 declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #1
diff --git a/test/llvmpasses/late-lower-gc-addrspaces.ll b/test/llvmpasses/late-lower-gc-addrspaces.ll
index 702e44b2b0e28..9c041664a9682 100644
--- a/test/llvmpasses/late-lower-gc-addrspaces.ll
+++ b/test/llvmpasses/late-lower-gc-addrspaces.ll
@@ -1,6 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 target triple = "amdgcn-amd-amdhsa"
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7-ni:10:11:12:13"
@@ -19,28 +19,28 @@ define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
 
-; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
 
-; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
-; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 
-; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
 
-; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
-; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
 
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -51,14 +51,14 @@ top:
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
 
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; OPAQUE-NEXT: ret ptr addrspace(10) %v
+; CHECK-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -74,20 +74,20 @@ top:
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
 
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index 093cab1358141..d294847db8f9d 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -1,6 +1,6 @@
 ; This file is a part of Julia. License is MIT: https://julialang.org/license
 
-; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s --check-prefixes=CHECK,OPAQUE
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 @tag = external addrspace(10) global {}, align 16
 
@@ -16,28 +16,28 @@ define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
 
-; OPAQUE: %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE:  %pgcstack = call ptr @julia.get_pgcstack()
+; CHECK: %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK:  %pgcstack = call ptr @julia.get_pgcstack()
     %pgcstack = call {}*** @julia.get_pgcstack()
 
-; OPAQUE-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
-; OPAQUE-NEXT: call ptr addrspace(10) @jl_box_int64
+; CHECK-NEXT: call void @julia.push_gc_frame(ptr %gcframe, i32 2)
+; CHECK-NEXT: call ptr addrspace(10) @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 
-; OPAQUE: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
+; CHECK: [[GEP0:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %aboxed, ptr [[GEP0]]
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
 ; CHECK-NEXT: %bboxed =
 ; Make sure the same gc slot isn't re-used
 
-; OPAQUE-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
-; OPAQUE: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
-; OPAQUE-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
+; CHECK-NOT: call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT0]])
+; CHECK: [[GEP1:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 [[GEPSLOT1:[0-9]+]])
+; CHECK-NEXT: store ptr addrspace(10) %bboxed, ptr [[GEP1]]
 
 ; CHECK-NEXT: call void @boxed_simple
     call void @boxed_simple({} addrspace(10)* %aboxed,
                             {} addrspace(10)* %bboxed)
-; OPAQUE-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK-NEXT: call void @julia.pop_gc_frame(ptr %gcframe)
     ret void
 }
 
@@ -48,14 +48,14 @@ top:
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
 
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; OPAQUE-NEXT: ret ptr addrspace(10) %v
+; CHECK-NEXT: ret ptr addrspace(10) %v
     ret {} addrspace(10)* %v
 }
 
@@ -71,20 +71,20 @@ top:
     %0 = bitcast {}*** %pgcstack to {}**
     %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
 
-; OPAQUE: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
-; OPAQUE-NEXT: [[ptls_field:%.*]] = getelementptr inbounds ptr, ptr %current_task, i64 16
-; OPAQUE-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
-; OPAQUE-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
-; OPAQUE-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
-; OPAQUE-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
+; CHECK: %current_task = getelementptr inbounds ptr, ptr %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds i8, ptr %current_task,
+; CHECK-NEXT: [[ptls_load:%.*]] = load ptr, ptr [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: %v = call noalias nonnull ptr addrspace(10) @julia.gc_alloc_bytes(ptr [[ptls_load]], [[SIZE_T:i.[0-9]+]] 8, i64 {{.*}} @tag {{.*}})
+; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds ptr addrspace(10), ptr addrspace(10) %v, i64 -1
+; CHECK-NEXT: store atomic ptr addrspace(10) @tag, ptr addrspace(10) [[V_HEADROOM]] unordered, align 8, !tbaa !4
     %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
-; OPAQUE-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
+; CHECK-NEXT: %v64 = bitcast ptr addrspace(10) %v to ptr addrspace(10)
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; OPAQUE-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
+; CHECK-NEXT: %loadedval = load i64, ptr addrspace(10) %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; OPAQUE-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
+; CHECK-NEXT: store i64 %loadedval, ptr addrspace(10) %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; OPAQUE-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
+; CHECK-NEXT: %lv2 = load i64, ptr addrspace(10) %v64, align 8, !tbaa !11, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
@@ -162,13 +162,13 @@ define void @decayar([2 x {} addrspace(10)* addrspace(11)*] %ar) {
 
 ; CHECK-LABEL: @decayar
 
-; OPAQUE:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
-; OPAQUE: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
-; OPAQUE:  store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8
-; OPAQUE:  [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
-; OPAQUE: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8
-; OPAQUE: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
-; OPAQUE: call void @julia.pop_gc_frame(ptr %gcframe)
+; CHECK:  %gcframe = call ptr @julia.new_gc_frame(i32 2)
+; CHECK: [[gc_slot_addr_:%.*]]1 = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 1)
+; CHECK:  store ptr addrspace(10) %l0, ptr [[gc_slot_addr_:%.*]], align 8
+; CHECK:  [[gc_slot_addr_:%.*]] = call ptr @julia.get_gc_frame_slot(ptr %gcframe, i32 0)
+; CHECK: store ptr addrspace(10) %l1, ptr [[gc_slot_addr_:%.*]], align 8
+; CHECK: %r = call i32 @callee_root(ptr addrspace(10) %l0, ptr addrspace(10) %l1)
+; CHECK: call void @julia.pop_gc_frame(ptr %gcframe)
 
 !0 = !{i64 0, i64 23}
 !1 = !{!1}
diff --git a/test/llvmpasses/names.jl b/test/llvmpasses/names.jl
index fe692d0fab787..1ab2204044804 100644
--- a/test/llvmpasses/names.jl
+++ b/test/llvmpasses/names.jl
@@ -135,7 +135,8 @@ emit(f2, Float64, Float64, Float64, Float64, Float64, Float64, Float64)
 
 # CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f5
 # CHECK-SAME: %"a::A"
-# CHECK: %"a::A.b_ptr.c_ptr.d
+# CHECK: %"a::A.d
+# COM: this text check relies on our LLVM code emission being relatively poor, which is not always the case
 emit(f5, A)
 
 # CHECK: define {{(swiftcc )?}}nonnull ptr @julia_f6
diff --git a/test/llvmpasses/parsing.ll b/test/llvmpasses/parsing.ll
index e75ba292f254a..e0a726176b225 100644
--- a/test/llvmpasses/parsing.ll
+++ b/test/llvmpasses/parsing.ll
@@ -1,6 +1,9 @@
 ; COM: NewPM-only test, tests for ability to parse Julia passes
 
 ; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes='module(CPUFeatures,RemoveNI,JuliaMultiVersioning,RemoveJuliaAddrspaces,LowerPTLSPass,function(DemoteFloat16,CombineMulAdd,LateLowerGCFrame,FinalLowerGC,AllocOpt,PropagateJuliaAddrspaces,LowerExcHandlers,GCInvariantVerifier,loop(LowerSIMDLoop,JuliaLICM),GCInvariantVerifier<strong>,GCInvariantVerifier<no-strong>),LowerPTLSPass<imaging>,LowerPTLSPass<no-imaging>,JuliaMultiVersioning<external>,JuliaMultiVersioning<no-external>)' -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;llvm_only>" -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_llvm_only>" -S %s -o /dev/null
+; RUN: opt --load-pass-plugin=libjulia-codegen%shlibext -passes="julia<level=3;no_enable_vector_pipeline>" -S %s -o /dev/null
 
 define void @test() {
     ret void
diff --git a/test/llvmpasses/pipeline-o0.jl b/test/llvmpasses/pipeline-o0.jl
index 1075d126c59ca..5dab675f2b547 100644
--- a/test/llvmpasses/pipeline-o0.jl
+++ b/test/llvmpasses/pipeline-o0.jl
@@ -10,7 +10,7 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
 # CHECK-LABEL: @julia_simple
 # CHECK-NOT: julia.get_pgcstack
 # CHECK-NOT: julia.gc_alloc_obj
-# CHECK: ijl_gc_pool_alloc
+# CHECK: ijl_gc_small_alloc
 # COM: we want something vaguely along the lines of asm load from the fs register -> allocate bytes
 function simple()
     Ref(0)
diff --git a/test/llvmpasses/pipeline-prints.ll b/test/llvmpasses/pipeline-prints.ll
index babd26c797a38..ecb70953026c2 100644
--- a/test/llvmpasses/pipeline-prints.ll
+++ b/test/llvmpasses/pipeline-prints.ll
@@ -298,12 +298,12 @@ attributes #2 = { inaccessiblemem_or_argmemonly }
 
 ; COM: Loop simplification makes the exit condition obvious
 ; AFTERLOOPSIMPLIFICATION: L35.lr.ph:
-; AFTERLOOPSIMPLIFICATION-NEXT: add nuw nsw
+; AFTERLOOPSIMPLIFICATION: add nuw nsw
 
 ; COM: Scalar optimization removes the previous add from the preheader
-; AFTERSCALAROPTIMIZATION: L35.preheader:
+; AFTERSCALAROPTIMIZATION: L35.lr.ph:
 ; AFTERSCALAROPTIMIZATION-NOT: add nuw nsw
-; AFTERSCALAROPTIMIZATION-NEXT: br label %L35
+; AFTERSCALAROPTIMIZATION: br label %L35
 
 ; COM: Vectorization does stuff
 ; AFTERVECTORIZATION: vector.body
diff --git a/test/loading.jl b/test/loading.jl
index 8310cb03c410b..1674a9f59a0c3 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -1,10 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-original_depot_path = copy(Base.DEPOT_PATH)
-
 using Test
 
 # Tests for @__LINE__ inside and outside of macros
+# NOTE: the __LINE__ numbers for these first couple tests are significant, so
+# adding any lines here will make those tests fail
 @test (@__LINE__) == 8
 
 macro macro_caller_lineno()
@@ -33,6 +33,9 @@ end
 @test @nested_LINE_expansion() == ((@__LINE__() - 4, @__LINE__() - 12), @__LINE__())
 @test @nested_LINE_expansion2() == ((@__LINE__() - 5, @__LINE__() - 9), @__LINE__())
 
+original_depot_path = copy(Base.DEPOT_PATH)
+include("precompile_utils.jl")
+
 loaded_files = String[]
 push!(Base.include_callbacks, (mod::Module, fn::String) -> push!(loaded_files, fn))
 include("test_sourcepath.jl")
@@ -167,7 +170,7 @@ end
 
             @test root.uuid == root_uuid
             @test this.uuid == this_uuid
-            @test that == nothing
+            @test that === nothing
 
             write(project_file, """
             name = "Root"
@@ -180,8 +183,8 @@ end
             that = Base.identify_package("That")
 
             @test root.uuid == proj_uuid
-            @test this == nothing
-            @test that == nothing
+            @test this === nothing
+            @test that === nothing
         finally
             copy!(LOAD_PATH, old_load_path)
         end
@@ -213,8 +216,8 @@ end
             that = Base.identify_package("That")
 
             @test root.uuid == root_uuid
-            @test this == nothing
-            @test that == nothing
+            @test this === nothing
+            @test that === nothing
 
             @test Base.get_uuid_name(project_file, this_uuid) == "This"
         finally
@@ -273,8 +276,8 @@ end
         @test joinpath(@__DIR__, normpath(path)) == locate_package(pkg)
         @test Base.compilecache_path(pkg, UInt64(0)) == Base.compilecache_path(pkg, UInt64(0))
     end
-    @test identify_package("Baz") == nothing
-    @test identify_package("Qux") == nothing
+    @test identify_package("Baz") === nothing
+    @test identify_package("Qux") === nothing
     @testset "equivalent package names" begin
          classes = [
             ["Foo"],
@@ -793,6 +796,17 @@ import .Foo28190.Libdl; import Libdl
     end
 end
 
+@testset "`::AbstractString` constraint on the path argument to `include`" begin
+    for m ∈ (NotPkgModule, evalfile("testhelpers/just_module.jl"))
+        let i = m.include
+            @test !applicable(i, (nothing,))
+            @test !applicable(i, (identity, nothing,))
+            @test !hasmethod(i, Tuple{Nothing})
+            @test !hasmethod(i, Tuple{Function,Nothing})
+        end
+    end
+end
+
 @testset "`Base.project_names` and friends" begin
     # Some functions in Pkg assumes that these tuples have the same length
     n = length(Base.project_names)
@@ -848,29 +862,13 @@ end
         proj = joinpath(tmp, "Project.toml")
         touch(proj)
         touch(joinpath(tmp, "Manifest-v1.5.toml"))
-        @test Base.project_file_manifest_path(proj) == nothing
+        @test Base.project_file_manifest_path(proj) === nothing
         touch(joinpath(tmp, "Manifest.toml"))
         man = basename(Base.project_file_manifest_path(proj))
         @test man == "Manifest.toml"
     end
 end
 
-@testset "error message loading pkg bad module name" begin
-    mktempdir() do tmp
-        old_loadpath = copy(LOAD_PATH)
-        try
-            push!(LOAD_PATH, tmp)
-            write(joinpath(tmp, "BadCase.jl"), "module badcase end")
-            @test_logs (:warn, r"The call to compilecache failed.*") match_mode=:any begin
-                @test_throws ErrorException("package `BadCase` did not define the expected module `BadCase`, \
-                    check for typos in package module name") (@eval using BadCase)
-            end
-        finally
-            copy!(LOAD_PATH, old_loadpath)
-        end
-    end
-end
-
 @testset "Preferences loading" begin
     mktempdir() do dir
         this_uuid = uuid4()
@@ -1034,6 +1032,16 @@ end
 end
 
 @testset "Extensions" begin
+    test_ext = """
+    function test_ext(parent::Module, ext::Symbol)
+        _ext = Base.get_extension(parent, ext)
+        _ext isa Module || error("expected extension \$ext to be loaded")
+        _pkgdir = pkgdir(_ext)
+        _pkgdir == pkgdir(parent) != nothing || error("unexpected extension \$ext pkgdir path: \$_pkgdir")
+        _pkgversion = pkgversion(_ext)
+        _pkgversion == pkgversion(parent) || error("unexpected extension \$ext version: \$_pkgversion")
+    end
+    """
     depot_path = mktempdir()
     try
         proj = joinpath(@__DIR__, "project", "Extensions", "HasDepWithExtensions.jl")
@@ -1044,6 +1052,7 @@ end
             cmd = """
             $load_distr
             begin
+                $ew $test_ext
                 $ew push!(empty!(DEPOT_PATH), $(repr(depot_path)))
                 using HasExtensions
                 $ew using HasExtensions
@@ -1051,6 +1060,7 @@ end
                 $ew HasExtensions.ext_loaded && error("ext_loaded set")
                 using HasDepWithExtensions
                 $ew using HasDepWithExtensions
+                $ew test_ext(HasExtensions, :Extension)
                 $ew Base.get_extension(HasExtensions, :Extension).extvar == 1 || error("extvar in Extension not set")
                 $ew HasExtensions.ext_loaded || error("ext_loaded not set")
                 $ew HasExtensions.ext_folder_loaded && error("ext_folder_loaded set")
@@ -1102,13 +1112,14 @@ end
 
         test_ext_proj = """
         begin
+            $test_ext
             using HasExtensions
             using ExtDep
-            Base.get_extension(HasExtensions, :Extension) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :Extension)
             using ExtDep2
-            Base.get_extension(HasExtensions, :ExtensionFolder) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :ExtensionFolder)
             using ExtDep3
-            Base.get_extension(HasExtensions, :ExtensionDep) isa Module || error("expected extension to load")
+            test_ext(HasExtensions, :ExtensionDep)
         end
         """
         for compile in (`--compiled-modules=no`, ``)
@@ -1147,6 +1158,19 @@ end
         finally
             copy!(LOAD_PATH, old_load_path)
         end
+
+        # Extension with cycles in dependencies
+        code = """
+        using CyclicExtensions
+        Base.get_extension(CyclicExtensions, :ExtA) isa Module || error("expected extension to load")
+        Base.get_extension(CyclicExtensions, :ExtB) isa Module || error("expected extension to load")
+        CyclicExtensions.greet()
+        """
+        proj = joinpath(@__DIR__, "project", "Extensions", "CyclicExtensions")
+        cmd =  `$(Base.julia_cmd()) --startup-file=no -e $code`
+        cmd = addenv(cmd, "JULIA_LOAD_PATH" => proj)
+        @test occursin("Hello Cycles!", String(read(cmd)))
+
     finally
         try
             rm(depot_path, force=true, recursive=true)
@@ -1255,96 +1279,6 @@ end
     @test success(`$(Base.julia_cmd()) --startup-file=no -e 'using Statistics'`)
 end
 
-@testset "checking srcpath modules" begin
-    p = Base.PkgId("Dummy")
-    fpath, _ = mktemp()
-    @testset "valid" begin
-        write(fpath, """
-        module Foo
-        using Bar
-        end
-        """)
-        @test Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        baremodule Foo
-        using Bar
-        end
-        """)
-        @test Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        \"\"\"
-        Foo
-        using Foo
-        \"\"\"
-        module Foo
-        using Bar
-        end
-        """)
-        @test Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        \"\"\" Foo \"\"\"
-        module Foo
-        using Bar
-        end
-        """)
-        @test Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        \"\"\"
-        Foo
-        \"\"\" module Foo
-        using Bar
-        end
-        """)
-        @test Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        @doc let x = 1
-            x
-        end module Foo
-        using Bar
-        end
-        """)
-        @test Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        # using foo
-        module Foo
-        using Bar
-        end
-        """)
-        @test Base.check_src_module_wrap(p, fpath)
-    end
-    @testset "invalid" begin
-        write(fpath, """
-        # module Foo
-        using Bar
-        # end
-        """)
-        @test_throws ErrorException Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        using Bar
-        module Foo
-        end
-        """)
-        @test_throws ErrorException Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        using Bar
-        """)
-        @test_throws ErrorException Base.check_src_module_wrap(p, fpath)
-
-        write(fpath, """
-        x = 1
-        """)
-        @test_throws ErrorException Base.check_src_module_wrap(p, fpath)
-    end
-end
-
 @testset "relocatable upgrades #51989" begin
     mktempdir() do depot
         # realpath is needed because Pkg is used for one of the precompile paths below, and Pkg calls realpath on the
@@ -1584,6 +1518,7 @@ end
 @testset "-m" begin
     rot13proj = joinpath(@__DIR__, "project", "Rot13")
     @test readchomp(`$(Base.julia_cmd()) --startup-file=no --project=$rot13proj -m Rot13 --project nowhere ABJURER`) == "--cebwrpg abjurer NOWHERE "
+    @test readchomp(`$(Base.julia_cmd()) --startup-file=no --project=$rot13proj -m Rot13.Rot26 --project nowhere ABJURER`) == "--project nowhere ABJURER "
 end
 
 @testset "workspace loading" begin
@@ -1671,3 +1606,32 @@ end
         copy!(LOAD_PATH, old_load_path)
     end
 end
+
+@testset "require_stdlib loading duplication" begin
+    depot_path = mktempdir()
+    oldBase64 = nothing
+    try
+        push!(empty!(DEPOT_PATH), depot_path)
+        Base64_key = Base.PkgId(Base.UUID("2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"), "Base64")
+        oldBase64 = Base.unreference_module(Base64_key)
+        cc = Base.compilecache(Base64_key)
+        @test Base.isprecompiled(Base64_key, cachepaths=String[cc[1]])
+        empty!(DEPOT_PATH)
+        Base.require_stdlib(Base64_key)
+        push!(DEPOT_PATH, depot_path)
+        append!(DEPOT_PATH, original_depot_path)
+        oldloaded = @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[])))
+        Base.require(Base64_key)
+        @test @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[]))) == oldloaded
+        Base.unreference_module(Base64_key)
+        empty!(DEPOT_PATH)
+        push!(DEPOT_PATH, depot_path)
+        Base.require(Base64_key)
+        @test @lock(Base.require_lock, length(get(Base.loaded_precompiles, Base64_key, Module[]))) == oldloaded + 1
+        Base.unreference_module(Base64_key)
+    finally
+        oldBase64 === nothing || Base.register_root_module(oldBase64)
+        copy!(DEPOT_PATH, original_depot_path)
+        rm(depot_path, force=true, recursive=true)
+    end
+end
diff --git a/test/math.jl b/test/math.jl
index c0a2d8bf8c9f8..5a9f3248e59f4 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -1498,6 +1498,28 @@ end
     n = Int64(1024 / log2(E))
     @test E^n == Inf
     @test E^float(n) == Inf
+
+    # #55633
+    struct Issue55633_1 <: Number end
+    struct Issue55633_3 <: Number end
+    struct Issue55633_9 <: Number end
+    Base.one(::Issue55633_3) = Issue55633_1()
+    Base.:(*)(::Issue55633_3, ::Issue55633_3) = Issue55633_9()
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Issue55633_3}) = Int
+    Base.promote_rule(::Type{Issue55633_3}, ::Type{Issue55633_9}) = Int
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Issue55633_9}) = Int
+    Base.promote_rule(::Type{Issue55633_1}, ::Type{Int}) = Int
+    Base.promote_rule(::Type{Issue55633_3}, ::Type{Int}) = Int
+    Base.promote_rule(::Type{Issue55633_9}, ::Type{Int}) = Int
+    Base.convert(::Type{Int}, ::Issue55633_1) = 1
+    Base.convert(::Type{Int}, ::Issue55633_3) = 3
+    Base.convert(::Type{Int}, ::Issue55633_9) = 9
+    for x ∈ (im, pi, Issue55633_3())
+        p = promote(one(x), x, x*x)
+        for y ∈ 0:2
+            @test all((t -> ===(t...)), zip(x^y, p[y + 1]))
+        end
+    end
 end
 
 # Test that sqrt behaves correctly and doesn't exhibit fp80 double rounding.
diff --git a/test/misc.jl b/test/misc.jl
index 10a25fa1c5ff6..e089395ce4557 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -132,7 +132,7 @@ end
 # Lockable{T, L<:AbstractLock}
 using Base: Lockable
 let
-    @test_broken Base.isexported(Base, :Lockable)
+    @test Base.isexported(Base, :Lockable)
     lockable = Lockable(Dict("foo" => "hello"), ReentrantLock())
     # note field access is non-public
     @test lockable.value["foo"] == "hello"
@@ -159,6 +159,16 @@ let
     @test @lock(lockable2, lockable2[]["foo"]) == "hello"
 end
 
+@testset "`show` for ReentrantLock" begin
+    l = ReentrantLock()
+    @test repr(l) == "ReentrantLock()"
+    @test repr("text/plain", l) == "ReentrantLock() (unlocked)"
+    @lock l begin
+        @test startswith(repr("text/plain", l), "ReentrantLock() (locked by current Task (")
+    end
+    @test repr("text/plain", l) == "ReentrantLock() (unlocked)"
+end
+
 for l in (Threads.SpinLock(), ReentrantLock())
     @test get_finalizers_inhibited() == 0
     @test lock(get_finalizers_inhibited, l) == 1
@@ -321,25 +331,43 @@ v11801, t11801 = @timed sin(1)
 @test names(@__MODULE__, all = true) == names_before_timing
 
 redirect_stdout(devnull) do # suppress time prints
+
 # Accepted @time argument formats
 @test @time true
 @test @time "message" true
+@test @time 1 true
 let msg = "message"
     @test @time msg true
 end
 let foo() = "message"
     @test @time foo() true
 end
+let foo() = 1
+    @test @time foo() true
+end
 
 # Accepted @timev argument formats
 @test @timev true
 @test @timev "message" true
+@test @timev 1 true
 let msg = "message"
     @test @timev msg true
 end
 let foo() = "message"
     @test @timev foo() true
 end
+let foo() = 1
+    @test @timev foo() true
+end
+
+# this is internal, but used for easy testing
+@test sprint(Base.time_print, 1e9) == "  1.000000 seconds"
+@test sprint(Base.time_print, 1e9, 111, 0, 222) == "  1.000000 seconds (222 allocations: 111 bytes)"
+@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222) == "  1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time)"
+@test sprint(Base.time_print, 1e9, 111, 0, 222, 333) == "  1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts)"
+@test sprint(Base.time_print, 1e9, 0, 0, 0, 333) == "  1.000000 seconds (333 lock conflicts)"
+@test sprint(Base.time_print, 1e9, 111, 0, 222, 333, 0.25e9) == "  1.000000 seconds (222 allocations: 111 bytes, 333 lock conflicts, 25.00% compilation time)"
+@test sprint(Base.time_print, 1e9, 111, 0.5e9, 222, 333, 0.25e9, 0.175e9) == "  1.000000 seconds (222 allocations: 111 bytes, 50.00% gc time, 333 lock conflicts, 25.00% compilation time: 70% of which was recompilation)"
 
 # @showtime
 @test @showtime true
@@ -1548,3 +1576,23 @@ end
 @testset "Base.Libc docstrings" begin
     @test isempty(Docs.undocumented_names(Libc))
 end
+
+@testset "Silenced missed transformations" begin
+    # Ensure the WarnMissedTransformationsPass is not on by default
+    src = """
+        @noinline iteration(i) = (@show(i); return nothing)
+        @eval function loop_unroll_full_fail(N)
+            for i in 1:N
+              iteration(i)
+              \$(Expr(:loopinfo, (Symbol("llvm.loop.unroll.full"), 1)))
+          end
+       end
+       loop_unroll_full_fail(3)
+    """
+    out_err = mktemp() do _, f
+        run(`$(Base.julia_cmd()) -e "$src"`, devnull, devnull, f)
+        seekstart(f)
+        read(f, String)
+    end
+    @test !occursin("loop not unrolled", out_err)
+end
diff --git a/test/mpfr.jl b/test/mpfr.jl
index 9a9698ba72c2c..c212bdfc92821 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -1088,3 +1088,12 @@ end
         clear_flags()
     end
 end
+
+@testset "BigFloatData truncation OOB read" begin
+    @testset "T: $T" for T ∈ (UInt8, UInt16, UInt32, UInt64, UInt128)
+        v = Base.MPFR.BigFloatData{T}(fill(typemax(T), 1 + Base.MPFR.offset_p_limbs))
+        @testset "bit_count: $bit_count" for bit_count ∈ (0:10:80)
+            @test Base.MPFR.truncated(UInt128, v, bit_count) isa Any
+        end
+    end
+end
diff --git a/test/namedtuple.jl b/test/namedtuple.jl
index 0487f96496309..b8dba5c06422e 100644
--- a/test/namedtuple.jl
+++ b/test/namedtuple.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Base: delete
+
 @test_throws TypeError NamedTuple{1,Tuple{}}
 @test_throws TypeError NamedTuple{(),1}
 @test_throws TypeError NamedTuple{(:a,1),Tuple{Int}}
@@ -282,6 +284,11 @@ end
 abstr_nt_22194_3()
 @test Base.return_types(abstr_nt_22194_3, ()) == Any[Any]
 
+@test delete((a=1,), :a) == NamedTuple()
+@test delete((a=1, b=2), :a) == (b=2,)
+@test delete((a=1, b=2, c=3), :b) == (a=1, c=3)
+@test delete((a=1, b=2, c=3), :z) == (a=1, b=2, c=3)
+
 @test Base.structdiff((a=1, b=2), (b=3,)) == (a=1,)
 @test Base.structdiff((a=1, b=2, z=20), (b=3,)) == (a=1, z=20)
 @test Base.structdiff((a=1, b=2, z=20), (b=3, q=20, z=1)) == (a=1,)
diff --git a/test/numbers.jl b/test/numbers.jl
index 34e775f9b2eea..fc3dc2c06bb7c 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -1158,6 +1158,8 @@ end
 end
 
 @testset "Irrationals compared with Rationals and Floats" begin
+    @test pi != Float64(pi)
+    @test Float64(pi) != pi
     @test Float64(pi,RoundDown) < pi
     @test Float64(pi,RoundUp) > pi
     @test !(Float64(pi,RoundDown) > pi)
@@ -1176,6 +1178,7 @@ end
     @test nextfloat(big(pi)) > pi
     @test !(prevfloat(big(pi)) > pi)
     @test !(nextfloat(big(pi)) < pi)
+    @test big(typeof(pi)) == BigFloat
 
     @test 2646693125139304345//842468587426513207 < pi
     @test !(2646693125139304345//842468587426513207 > pi)
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index 9d6a8b08c0b1f..fb5855dfbaa0d 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -383,6 +383,18 @@ v2 = copy(v)
 @test v2[end-1] == 2
 @test v2[end] == 1
 
+# push!(v::AbstractVector, x...)
+v2 = copy(v)
+@test @invoke(push!(v2::AbstractVector, 3)) === v2
+@test v2[axes(v,1)] == v
+@test v2[end] == 3
+@test v2[begin] == v[begin] == v[-2]
+v2 = copy(v)
+@test @invoke(push!(v2::AbstractVector, 5, 6)) == v2
+@test v2[axes(v,1)] == v
+@test v2[end-1] == 5
+@test v2[end] == 6
+
 # append! from array
 v2 = copy(v)
 @test append!(v2, [2, 1]) === v2
@@ -399,6 +411,23 @@ v2 = copy(v)
 @test v2[axes(v, 1)] == v
 @test v2[lastindex(v)+1:end] == [2, 1]
 
+# append!(::AbstractVector, ...)
+# append! from array
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, [2, 1]::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+# append! from HasLength iterator
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, (v for v in [2, 1])::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+# append! from SizeUnknown iterator
+v2 = copy(v)
+@test @invoke(append!(v2::AbstractVector, (v for v in [2, 1] if true)::Any)) === v2
+@test v2[axes(v, 1)] == v
+@test v2[lastindex(v)+1:end] == [2, 1]
+
 # other functions
 v = OffsetArray(v0, (-3,))
 @test lastindex(v) == 1
@@ -865,7 +894,23 @@ end
     @test CartesianIndices(A) == CartesianIndices(B)
 end
 
+@testset "overflowing show" begin
+    A = OffsetArray(repeat([1], 1), typemax(Int)-1)
+    b = IOBuffer(maxsize=10)
+    show(b, A)
+    @test String(take!(b)) == "[1]"
+    show(b, (A, A))
+    @test String(take!(b)) == "([1], [1])"
+end
+
 @testset "indexing views (#53249)" begin
     v = view([1,2,3,4], :)
     @test v[Base.IdentityUnitRange(2:3)] == OffsetArray(2:3, 2:3)
 end
+
+@testset "mapreduce with OffsetRanges" begin
+    r = 5:100
+    a = OffsetArray(r, 2)
+    b = sum(a, dims=1)
+    @test b[begin] == sum(r)
+end
diff --git a/test/path.jl b/test/path.jl
index 2f4f2d0983a58..4c2c7034577d5 100644
--- a/test/path.jl
+++ b/test/path.jl
@@ -311,6 +311,19 @@
         test_relpath()
     end
 
+    @testset "uripath" begin
+        host = if Sys.iswindows() "" else gethostname() end
+        sysdrive, uridrive = if Sys.iswindows() "C:\\", "C:/" else "/", "" end
+        @test Base.Filesystem.uripath("$(sysdrive)some$(sep)file.txt") == "file://$host/$(uridrive)some/file.txt"
+        @test Base.Filesystem.uripath("$(sysdrive)another$(sep)$(sep)folder$(sep)file.md") == "file://$host/$(uridrive)another/folder/file.md"
+        @test Base.Filesystem.uripath("$(sysdrive)some file with ^odd% chars") == "file://$host/$(uridrive)some%20file%20with%20%5Eodd%25%20chars"
+        @test Base.Filesystem.uripath("$(sysdrive)weird chars like @#&()[]{}") == "file://$host/$(uridrive)weird%20chars%20like%20%40%23%26%28%29%5B%5D%7B%7D"
+        @test Base.Filesystem.uripath("$sysdrive") == "file://$host/$uridrive"
+        @test Base.Filesystem.uripath(".") == Base.Filesystem.uripath(pwd())
+        @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)Δεδομένα") == "file://$host/$(uridrive)unicode/%CE%94%CE%B5%CE%B4%CE%BF%CE%BC%CE%AD%CE%BD%CE%B1"
+        @test Base.Filesystem.uripath("$(sysdrive)unicode$(sep)🧮🐛🔨") == "file://$host/$(uridrive)unicode/%F0%9F%A7%AE%F0%9F%90%9B%F0%9F%94%A8"
+    end
+
     if Sys.iswindows()
         @testset "issue #23646" begin
             @test lowercase(relpath("E:\\a\\b", "C:\\c")) == "e:\\a\\b"
diff --git a/test/precompile.jl b/test/precompile.jl
index 3241ee8b25a35..7a6e41061f9b1 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -597,6 +597,10 @@ precompile_test_harness(false) do dir
     @test Base.invokelatest(Baz.baz) === 1
     @test Baz === UseBaz.Baz
 
+    # should not throw if the cachefile does not exist
+    @test !isfile("DoesNotExist.ji")
+    @test Base.stale_cachefile("", "DoesNotExist.ji") === true
+
     # Issue #12720
     FooBar1_file = joinpath(dir, "FooBar1.jl")
     write(FooBar1_file,
@@ -1921,7 +1925,7 @@ precompile_test_harness("Issue #50538") do load_path
             ex isa ErrorException || rethrow()
             ex
         end
-        global undefglobal
+        global undefglobal::Any
         end
         """)
     ji, ofile = Base.compilecache(Base.PkgId("I50538"))
@@ -2008,6 +2012,13 @@ precompile_test_harness("Generated Opaque") do load_path
                 Expr(:opaque_closure_method, nothing, 0, false, lno, ci))
         end
         @assert oc_re_generated_no_partial()() === 1
+        @generated function oc_re_generated_no_partial_macro()
+            AT = nothing
+            RT = nothing
+            allow_partial = false # makes this legal to generate during pre-compile
+            return Expr(:opaque_closure, AT, RT, RT, allow_partial, :(()->const_int_barrier()))
+        end
+        @assert oc_re_generated_no_partial_macro()() === 1
         end
         """)
     Base.compilecache(Base.PkgId("GeneratedOpaque"))
@@ -2082,4 +2093,78 @@ precompile_test_harness("Binding Unique") do load_path
     @test UniqueBinding2.thebinding2 === ccall(:jl_get_module_binding, Ref{Core.Binding}, (Any, Any, Cint), UniqueBinding2, :thebinding, true)
 end
 
+precompile_test_harness("Detecting importing outside of a package module") do load_path
+    io = IOBuffer()
+    write(joinpath(load_path, "ImportBeforeMod.jl"),
+    """
+    import Printf
+    module ImportBeforeMod
+    end #module
+    """)
+    @test_throws r"Failed to precompile ImportBeforeMod" Base.compilecache(Base.identify_package("ImportBeforeMod"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "HarmlessComments.jl"),
+    """
+    # import Printf
+    #=
+    import Printf
+    =#
+    module HarmlessComments
+    end #module
+    # import Printf
+    #=
+    import Printf
+    =#
+    """)
+    Base.compilecache(Base.identify_package("HarmlessComments"))
+
+
+    write(joinpath(load_path, "ImportAfterMod.jl"), """
+    module ImportAfterMod
+    end #module
+    import Printf
+    """)
+    @test_throws r"Failed to precompile ImportAfterMod" Base.compilecache(Base.identify_package("ImportAfterMod"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+end
+
+precompile_test_harness("No package module") do load_path
+    io = IOBuffer()
+    write(joinpath(load_path, "NoModule.jl"),
+    """
+    1
+    """)
+    @test_throws r"Failed to precompile NoModule" Base.compilecache(Base.identify_package("NoModule"), io, io)
+    @test occursin(
+        "NoModule [top-level] did not define the expected module `NoModule`, check for typos in package module name",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "WrongModuleName.jl"),
+    """
+    module DifferentName
+    x = 1
+    end #module
+    """)
+    @test_throws r"Failed to precompile WrongModuleName" Base.compilecache(Base.identify_package("WrongModuleName"), io, io)
+    @test occursin(
+        "WrongModuleName [top-level] did not define the expected module `WrongModuleName`, check for typos in package module name",
+        String(take!(io)))
+
+
+    write(joinpath(load_path, "NoModuleWithImport.jl"), """
+    import Printf
+    """)
+    @test_throws r"Failed to precompile NoModuleWithImport" Base.compilecache(Base.identify_package("NoModuleWithImport"), io, io)
+    @test occursin(
+        "`using/import Printf` outside of a Module detected. Importing a package outside of a module is not allowed during package precompilation.",
+        String(take!(io)))
+end
+
 finish_precompile_test!()
diff --git a/test/project/Extensions/CyclicExtensions/Manifest.toml b/test/project/Extensions/CyclicExtensions/Manifest.toml
new file mode 100644
index 0000000000000..a506825cf7995
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/Manifest.toml
@@ -0,0 +1,21 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.10.4"
+manifest_format = "2.0"
+project_hash = "ec25ff8df3a5e2212a173c3de2c7d716cc47cd36"
+
+[[deps.ExtDep]]
+deps = ["SomePackage"]
+path = "../ExtDep.jl"
+uuid = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+version = "0.1.0"
+
+[[deps.ExtDep2]]
+path = "../ExtDep2"
+uuid = "55982ee5-2ad5-4c40-8cfe-5e9e1b01500d"
+version = "0.1.0"
+
+[[deps.SomePackage]]
+path = "../SomePackage"
+uuid = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+version = "0.1.0"
diff --git a/test/project/Extensions/CyclicExtensions/Project.toml b/test/project/Extensions/CyclicExtensions/Project.toml
new file mode 100644
index 0000000000000..08d539dcc40ae
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/Project.toml
@@ -0,0 +1,13 @@
+name = "CyclicExtensions"
+uuid = "17d4f0df-b55c-4714-ac4b-55fa23f7355c"
+version = "0.1.0"
+
+[deps]
+ExtDep = "fa069be4-f60b-4d4c-8b95-f8008775090c"
+
+[weakdeps]
+SomePackage = "678608ae-7bb3-42c7-98b1-82102067a3d8"
+
+[extensions]
+ExtA = ["SomePackage"]
+ExtB = ["SomePackage"]
diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtA.jl b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl
new file mode 100644
index 0000000000000..fa0c0961633cb
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/ext/ExtA.jl
@@ -0,0 +1,6 @@
+module ExtA
+
+using CyclicExtensions
+using SomePackage
+
+end
diff --git a/test/project/Extensions/CyclicExtensions/ext/ExtB.jl b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl
new file mode 100644
index 0000000000000..8f6da556d39b8
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/ext/ExtB.jl
@@ -0,0 +1,6 @@
+module ExtB
+
+using CyclicExtensions
+using SomePackage
+
+end
diff --git a/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl
new file mode 100644
index 0000000000000..f1c2ec2077562
--- /dev/null
+++ b/test/project/Extensions/CyclicExtensions/src/CyclicExtensions.jl
@@ -0,0 +1,7 @@
+module CyclicExtensions
+
+using ExtDep
+
+greet() = print("Hello Cycles!")
+
+end # module CyclicExtensions
diff --git a/test/project/Rot13/src/Rot13.jl b/test/project/Rot13/src/Rot13.jl
index 1d19cbbe6df91..66f077812d878 100644
--- a/test/project/Rot13/src/Rot13.jl
+++ b/test/project/Rot13/src/Rot13.jl
@@ -12,4 +12,17 @@ function (@main)(args)
     return 0
 end
 
+module Rot26 # LOL
+
+import ..rot13
+
+rot26(str::AbstractString) = map(rot13 ∘ rot13, str)
+
+function (@main)(args)
+    foreach(arg -> print(rot26(arg), " "), args)
+    return 0
+end
+
+end
+
 end # module Rot13
diff --git a/test/ranges.jl b/test/ranges.jl
index d789871c6d049..86cd1c3f2345c 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -292,15 +292,10 @@ end
 
     rand_twiceprecision(::Type{T}) where {T<:Number} = Base.TwicePrecision{T}(rand(widen(T)))
 
-    rand_twiceprecision_is_ok(::Type{T}) where {T<:Number} = @test !iszero(rand_twiceprecision(T).lo)
-
     # For this test the `BigFloat` mantissa needs to be just a bit
     # larger than the `Float64` mantissa
     setprecision(BigFloat, 70) do
         n = 10
-        @testset "rand twiceprecision is ok" for T ∈ (Float32, Float64), i ∈ 1:n
-            rand_twiceprecision_is_ok(T)
-        end
         @testset "twiceprecision roundtrip is not lossy 1" for i ∈ 1:n
             twiceprecision_roundtrip_is_not_lossy(Float64, rand(BigFloat))
         end
@@ -437,17 +432,17 @@ end
     @testset "findfirst" begin
         @test findfirst(==(1), Base.IdentityUnitRange(-1:1)) == 1
         @test findfirst(isequal(3), Base.OneTo(10)) == 3
-        @test findfirst(==(0), Base.OneTo(10)) == nothing
-        @test findfirst(==(11), Base.OneTo(10)) == nothing
+        @test findfirst(==(0), Base.OneTo(10)) === nothing
+        @test findfirst(==(11), Base.OneTo(10)) === nothing
         @test findfirst(==(4), Int16(3):Int16(7)) === Int(2)
-        @test findfirst(==(2), Int16(3):Int16(7)) == nothing
-        @test findfirst(isequal(8), 3:7) == nothing
+        @test findfirst(==(2), Int16(3):Int16(7)) === nothing
+        @test findfirst(isequal(8), 3:7) === nothing
         @test findfirst(isequal(7), 1:2:10) == 4
         @test findfirst(==(7), 1:2:10) == 4
-        @test findfirst(==(10), 1:2:10) == nothing
-        @test findfirst(==(11), 1:2:10) == nothing
+        @test findfirst(==(10), 1:2:10) === nothing
+        @test findfirst(==(11), 1:2:10) === nothing
         @test findfirst(==(-7), 1:-1:-10) == 9
-        @test findfirst(==(2),1:-1:2) == nothing
+        @test findfirst(==(2),1:-1:2) === nothing
     end
     @testset "reverse" begin
         @test reverse(reverse(1:10)) == 1:10
diff --git a/test/rational.jl b/test/rational.jl
index c6f81372de0b9..90b5414a6fe89 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -702,6 +702,22 @@ end
     end
 end
 
+@testset "gcdx for 1 and 3+ arguments" begin
+    # one-argument
+    @test gcdx(7) == (7, 1)
+    @test gcdx(-7) == (7, -1)
+    @test gcdx(1//4) == (1//4, 1)
+
+    # 3+ arguments
+    @test gcdx(2//3) == gcdx(2//3) == (2//3, 1)
+    @test gcdx(15, 12, 20) == (1, 7, -7, -1)
+    @test gcdx(60//4, 60//5, 60//3) == (1//1, 7, -7, -1)
+    abcd = (105, 1638, 2145, 3185)
+    d, uvwp... = gcdx(abcd...)
+    @test d == sum(abcd .* uvwp) # u*a + v*b + w*c + p*d == gcd(a, b, c, d)
+    @test (@inferred gcdx(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) isa NTuple{11, Int}
+end
+
 @testset "Binary operations with Integer" begin
     @test 1//2 - 1 == -1//2
     @test -1//2 + 1 == 1//2
@@ -801,3 +817,20 @@ end
     @test rationalize(Int64, nextfloat(0.1) * im; tol=0) == precise_next * im
     @test rationalize(0.1im; tol=eps(0.1)) == rationalize(0.1im)
 end
+
+@testset "complex numerator, denominator" begin
+    z = complex(3*3, 2*3*5)
+    @test z === numerator(z) === numerator(z // 2) === numerator(z // 5)
+    @test complex(3, 2*5) === numerator(z // 3)
+    @test isone(denominator(z))
+    @test 2 === denominator(z // 2)
+    @test 1 === denominator(z // 3)
+    @test 5 === denominator(z // 5)
+    for den ∈ 1:10
+        q = z // den
+        @test q === (numerator(q)//denominator(q))
+    end
+    @testset "do not overflow silently" begin
+        @test_throws OverflowError numerator(Int8(1)//Int8(31) + Int8(8)im//Int8(3))
+    end
+end
diff --git a/test/reducedim.jl b/test/reducedim.jl
index 8f629fa83f28d..6a6f20214058c 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -587,6 +587,30 @@ end
     @test B[argmin(B, dims=[2, 3])] == @inferred(minimum(B, dims=[2, 3]))
 end
 
+@testset "careful with @inbounds" begin
+    Base.@propagate_inbounds f(x) = x == 2 ? x[-10000] : x
+    Base.@propagate_inbounds op(x,y) = x[-10000] + y[-10000]
+    for (arr, dims) in (([1,1,2], 1), ([1 1 2], 2), ([ones(Int,256);2], 1))
+        @test_throws BoundsError mapreduce(f, +, arr)
+        @test_throws BoundsError mapreduce(f, +, arr; dims)
+        @test_throws BoundsError mapreduce(f, +, arr; dims, init=0)
+        @test_throws BoundsError mapreduce(identity, op, arr)
+        try
+            #=@test_throws BoundsError=# mapreduce(identity, op, arr; dims)
+        catch ex
+            @test_broken ex isa BoundsError
+        end
+        @test_throws BoundsError mapreduce(identity, op, arr; dims, init=0)
+
+        @test_throws BoundsError findmin(f, arr)
+        @test_throws BoundsError findmin(f, arr; dims)
+
+        @test_throws BoundsError mapreduce(f, max, arr)
+        @test_throws BoundsError mapreduce(f, max, arr; dims)
+        @test_throws BoundsError mapreduce(f, max, arr; dims, init=0)
+    end
+end
+
 @testset "in-place reductions with mismatched dimensionalities" begin
     B = reshape(1:24, 4, 3, 2)
     for R in (fill(0, 4), fill(0, 4, 1), fill(0, 4, 1, 1))
diff --git a/test/reflection.jl b/test/reflection.jl
index 895da80e03c75..634390e0680d1 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -686,7 +686,7 @@ let
     @test @inferred wrapperT(ReflectionExample{T, Int64} where T) == ReflectionExample
     @test @inferred wrapperT(ReflectionExample) == ReflectionExample
     @test @inferred wrapperT(Union{ReflectionExample{Union{},1},ReflectionExample{Float64,1}}) == ReflectionExample
-    @test_throws(ErrorException("typename does not apply to unions whose components have different typenames"),
+    @test_throws(Core.TypeNameError(Union{Int, Float64}),
                  Base.typename(Union{Int, Float64}))
 end
 
@@ -1296,3 +1296,5 @@ end
 
 @test Base.infer_return_type(code_lowered, (Any,)) == Vector{Core.CodeInfo}
 @test Base.infer_return_type(code_lowered, (Any,Any)) == Vector{Core.CodeInfo}
+
+@test methods(Union{}) == Any[m.method for m in Base._methods_by_ftype(Tuple{Core.TypeofBottom, Vararg}, 1, Base.get_world_counter())] # issue #55187
diff --git a/test/regex.jl b/test/regex.jl
index a1d0b1b0ed69a..51802125a3467 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -213,7 +213,7 @@
 
         r = r"" * raw"a\Eb|c"
         @test match(r, raw"a\Eb|c").match == raw"a\Eb|c"
-        @test match(r, raw"c") == nothing
+        @test match(r, raw"c") === nothing
 
         # error for really incompatible options
         @test_throws ArgumentError r"a" * Regex("b", Base.DEFAULT_COMPILER_OPTS & ~Base.PCRE.UCP, Base.DEFAULT_MATCH_OPTS)
diff --git a/test/relocatedepot.jl b/test/relocatedepot.jl
index 039d422c35e25..2ef6dec90dbc1 100644
--- a/test/relocatedepot.jl
+++ b/test/relocatedepot.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 
 
@@ -26,16 +28,38 @@ end
 
 if !test_relocated_depot
 
-    @testset "insert @depot tag in path" begin
+    @testset "edge cases when inserting @depot tag in path" begin
 
+        # insert @depot only once for first match
         test_harness() do
             mktempdir() do dir
                 pushfirst!(DEPOT_PATH, dir)
-                path = dir*dir
-                @test Base.replace_depot_path(path) == "@depot"*dir
+                if Sys.iswindows()
+                    # dirs start with a drive letter instead of a path separator
+                    path = dir*Base.Filesystem.pathsep()*dir
+                    @test Base.replace_depot_path(path) == "@depot"*Base.Filesystem.pathsep()*dir
+                else
+                    path = dir*dir
+                    @test Base.replace_depot_path(path) == "@depot"*dir
+                end
+            end
+
+            # 55340
+            empty!(DEPOT_PATH)
+            mktempdir() do dir
+                jlrc = joinpath(dir, "julia-rc2")
+                jl   = joinpath(dir, "julia")
+                mkdir(jl)
+                push!(DEPOT_PATH, jl)
+                @test Base.replace_depot_path(jl) == "@depot"
+                @test Base.replace_depot_path(string(jl,Base.Filesystem.pathsep())) ==
+                            string("@depot",Base.Filesystem.pathsep())
+                @test Base.replace_depot_path(jlrc) != "@depot-rc2"
+                @test Base.replace_depot_path(jlrc) == jlrc
             end
         end
 
+        # deal with and without trailing path separators
         test_harness() do
             mktempdir() do dir
                 pushfirst!(DEPOT_PATH, dir)
@@ -43,9 +67,9 @@ if !test_relocated_depot
                 if isdirpath(DEPOT_PATH[1])
                     DEPOT_PATH[1] = dirname(DEPOT_PATH[1]) # strip trailing pathsep
                 end
-                tag = joinpath("@depot", "") # append a pathsep
+                tag = string("@depot", Base.Filesystem.pathsep())
                 @test startswith(Base.replace_depot_path(path), tag)
-                DEPOT_PATH[1] = joinpath(DEPOT_PATH[1], "") # append a pathsep
+                DEPOT_PATH[1] = string(DEPOT_PATH[1], Base.Filesystem.pathsep())
                 @test startswith(Base.replace_depot_path(path), tag)
                 popfirst!(DEPOT_PATH)
                 @test !startswith(Base.replace_depot_path(path), tag)
diff --git a/test/rounding.jl b/test/rounding.jl
index 76b15ec1d9118..6fad6f62e8dfe 100644
--- a/test/rounding.jl
+++ b/test/rounding.jl
@@ -470,3 +470,28 @@ end
         @test prevfloat(f) < i
     end
 end
+
+@testset "π to `BigFloat` with `setrounding`" begin
+    function irrational_to_big_float(c::AbstractIrrational)
+        BigFloat(c)
+    end
+
+    function irrational_to_big_float_with_rounding_mode(c::AbstractIrrational, rm::RoundingMode)
+        f = () -> irrational_to_big_float(c)
+        setrounding(f, BigFloat, rm)
+    end
+
+    function irrational_to_big_float_with_rounding_mode_and_precision(c::AbstractIrrational, rm::RoundingMode, prec::Int)
+        f = () -> irrational_to_big_float_with_rounding_mode(c, rm)
+        setprecision(f, BigFloat, prec)
+    end
+
+    for c ∈ (π, MathConstants.γ, MathConstants.catalan)
+        for p ∈ 1:40
+            @test (
+                irrational_to_big_float_with_rounding_mode_and_precision(c, RoundDown, p) < c <
+                irrational_to_big_float_with_rounding_mode_and_precision(c, RoundUp, p)
+            )
+        end
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
index c46472ac93fa8..e48e896f4069e 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -438,9 +438,9 @@ cd(@__DIR__) do
     # o_ts.verbose = true # set to true to show all timings when successful
     Test.print_test_results(o_ts, 1)
     if !o_ts.anynonpass
-        println("    \033[32;1mSUCCESS\033[0m")
+        printstyled("    SUCCESS\n"; bold=true, color=:green)
     else
-        println("    \033[31;1mFAILURE\033[0m\n")
+        printstyled("    FAILURE\n\n"; bold=true, color=:red)
         skipped > 0 &&
             println("$skipped test", skipped > 1 ? "s were" : " was", " skipped due to failure.")
         println("The global RNG seed was 0x$(string(seed, base = 16)).\n")
diff --git a/test/scopedvalues.jl b/test/scopedvalues.jl
index 61b10c557c455..2c2f4a510c1c9 100644
--- a/test/scopedvalues.jl
+++ b/test/scopedvalues.jl
@@ -138,6 +138,12 @@ end
         @test sval[] == 1
         @test sval_float[] == 1.0
     end
+    @with sval=>2 sval_float=>2.0 begin
+        @with begin
+            @test sval[] == 2
+            @test sval_float[] == 2.0
+        end
+    end
 end
 
 @testset "isassigned" begin
diff --git a/test/secretbuffer.jl b/test/secretbuffer.jl
index 29e28ded8da72..703552570745c 100644
--- a/test/secretbuffer.jl
+++ b/test/secretbuffer.jl
@@ -1,5 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :ChallengePrompts) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ChallengePrompts.jl"))
+using .Main.ChallengePrompts: challenge_prompt
+
 using Base: SecretBuffer, SecretBuffer!, shred!, isshredded
 using Test, Random
 
@@ -170,4 +174,29 @@ using Test, Random
         @test read(s5) == read(s6) == codeunits(str)
         shred!(s5); shred!(s6)
     end
+
+    if !Sys.iswindows()
+        @testset "getpass" begin
+            v1, s1 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I"); (read(s), Base.shred!(s))), ["LPAwVZM8D4I: " => "too many secrets\n"])
+            s2 = SecretBuffer("too many secrets")
+            @test s1 isa SecretBuffer
+            @test isshredded(s1)
+            @test v1 == read(s2) == codeunits("too many secrets")
+            shred!(s1); shred!(s2)
+
+            v3, s3 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I> ", with_suffix=false); (read(s), Base.shred!(s))), ["LPAwVZM8D4I> " => "frperg\n"])
+            s4 = SecretBuffer("frperg")
+            @test s3 isa SecretBuffer
+            @test isshredded(s3)
+            @test v3 == read(s4) == codeunits("frperg")
+            shred!(s3); shred!(s4)
+
+            v5, s5 = challenge_prompt(:(s=Base.getpass("LPAwVZM8D4I> ", with_suffix=true); (read(s), Base.shred!(s))), ["LPAwVZM8D4I> : " => "frperg\n"])
+            s6 = SecretBuffer("frperg")
+            @test s5 isa SecretBuffer
+            @test isshredded(s5)
+            @test v5 == read(s6) == codeunits("frperg")
+            shred!(s5); shred!(s6)
+        end
+    end
 end
diff --git a/test/show.jl b/test/show.jl
index 63663152d9d91..d9c3585b7c1df 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -755,6 +755,69 @@ end
 
 @test startswith(sprint(show, typeof(x->x), context = :module=>@__MODULE__), "var\"")
 
+# PR 53719
+module M53719
+    f = x -> x + 1
+    function foo(x)
+        function bar(y)
+            function baz(z)
+                return x + y + z
+            end
+            return baz
+        end
+        return bar
+    end
+    function foo2(x)
+        function bar2(y)
+            return z -> x + y + z
+        end
+        return bar2
+    end
+    lambda1 = (x)->begin
+        function foo(y)
+            return x + y
+        end
+        return foo
+    end
+    lambda2 = (x)->begin
+        y -> x + y
+    end
+end
+
+@testset "PR 53719 function names" begin
+    # M53719.f should be printed as var"#[0-9]+"
+    @test occursin(r"var\"#[0-9]+", sprint(show, M53719.f, context = :module=>M53719))
+    # M53719.foo(1) should be printed as var"#bar"
+    @test occursin(r"var\"#bar", sprint(show, M53719.foo(1), context = :module=>M53719))
+    # M53719.foo(1)(2) should be printed as var"#baz"
+    @test occursin(r"var\"#baz", sprint(show, M53719.foo(1)(2), context = :module=>M53719))
+    # M53719.foo2(1) should be printed as var"#bar2"
+    @test occursin(r"var\"#bar2", sprint(show, M53719.foo2(1), context = :module=>M53719))
+    # M53719.foo2(1)(2) should be printed as var"#foo2##[0-9]+"
+    @test occursin(r"var\"#foo2##[0-9]+", sprint(show, M53719.foo2(1)(2), context = :module=>M53719))
+    # M53719.lambda1(1) should be printed as var"#foo"
+    @test occursin(r"var\"#foo", sprint(show, M53719.lambda1(1), context = :module=>M53719))
+    # M53719.lambda2(1) should be printed as var"#[0-9]+"
+    @test occursin(r"var\"#[0-9]+", sprint(show, M53719.lambda2(1), context = :module=>M53719))
+end
+
+@testset "PR 53719 function types" begin
+    # typeof(M53719.f) should be printed as var"#[0-9]+#[0-9]+"
+    @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.f), context = :module=>M53719))
+    #typeof(M53719.foo(1)) should be printed as var"#bar#foo##[0-9]+"
+    @test occursin(r"var\"#bar#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)), context = :module=>M53719))
+    #typeof(M53719.foo(1)(2)) should be printed as var"#baz#foo##[0-9]+"
+    @test occursin(r"var\"#baz#foo##[0-9]+", sprint(show, typeof(M53719.foo(1)(2)), context = :module=>M53719))
+    #typeof(M53719.foo2(1)) should be printed as var"#bar2#foo2##[0-9]+"
+    @test occursin(r"var\"#bar2#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)), context = :module=>M53719))
+    #typeof(M53719.foo2(1)(2)) should be printed as var"#foo2##[0-9]+#foo2##[0-9]+"
+    @test occursin(r"var\"#foo2##[0-9]+#foo2##[0-9]+", sprint(show, typeof(M53719.foo2(1)(2)), context = :module=>M53719))
+    #typeof(M53719.lambda1(1)) should be printed as var"#foo#[0-9]+"
+    @test occursin(r"var\"#foo#[0-9]+", sprint(show, typeof(M53719.lambda1(1)), context = :module=>M53719))
+    #typeof(M53719.lambda2(1)) should be printed as var"#[0-9]+#[0-9]+"
+    @test occursin(r"var\"#[0-9]+#[0-9]+", sprint(show, typeof(M53719.lambda2(1)), context = :module=>M53719))
+end
+
 #test methodshow.jl functions
 @test Base.inbase(Base)
 @test !Base.inbase(LinearAlgebra)
@@ -2705,3 +2768,8 @@ let topmi = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ());
     topmi.def = Main
     @test contains(repr(topmi), "Toplevel MethodInstance")
 end
+
+@testset "show(<do-block expr>) no trailing whitespace" begin
+    do_expr1 = :(foo() do; bar(); end)
+    @test !contains(sprint(show, do_expr1), " \n")
+end
diff --git a/test/some.jl b/test/some.jl
index 59ccd05be96bf..89f699d8306c3 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -44,8 +44,8 @@
 
 ##  == and isequal nothing
 
-@test Some(1) != nothing
-@test Some(nothing) != nothing
+@test Some(1) !== nothing
+@test Some(nothing) !== nothing
 @test !isequal(Some(1), nothing)
 @test !isequal(Some(nothing), nothing)
 
diff --git a/test/spawn.jl b/test/spawn.jl
index 831eac493d4aa..c1802ba1f74da 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -573,7 +573,7 @@ end
 @test Cmd(`foo`, env=["A=true"]).env      == ["A=true"]
 @test Cmd(`foo`, env=("A"=>true,)).env    == ["A=true"]
 @test Cmd(`foo`, env=["A"=>true]).env     == ["A=true"]
-@test Cmd(`foo`, env=nothing).env         == nothing
+@test Cmd(`foo`, env=nothing).env         === nothing
 
 # test for interpolation of Cmd
 let c = setenv(`x`, "A"=>true)
diff --git a/test/specificity.jl b/test/specificity.jl
index 816a59f63e193..13688036c2047 100644
--- a/test/specificity.jl
+++ b/test/specificity.jl
@@ -316,3 +316,14 @@ end
 @test args_morespecific(Tuple{typeof(Union{}), Any}, Tuple{Any, Type{Union{}}})
 @test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any}, Tuple{Type{Union{}}, Any, Type{Union{}}})
 @test args_morespecific(Tuple{Type{Union{}}, Type{Union{}}, Any, Type{Union{}}}, Tuple{Type{Union{}}, Any, Type{Union{}}, Type{Union{}}})
+
+# requires assertions enabled
+let root = NTuple
+    N = root.var
+    T = root.body.var
+    x1 = root.body.body
+    x2 = Dict{T,Tuple{N}}
+    A = UnionAll(N, UnionAll(T, Tuple{Union{x1, x2}}))
+    B = Tuple{Union{UnionAll(N, UnionAll(T, x1)), UnionAll(N, UnionAll(T, x2))}}
+    @ccall jl_type_morespecific_no_subtype(A::Any, B::Any)::Cint
+end
diff --git a/test/staged.jl b/test/staged.jl
index aec4a3bf135d3..1b28144639f97 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -381,11 +381,17 @@ let
     @test length(ir.cfg.blocks) == 1
 end
 
+function generate_lambda_ex(world::UInt, source::LineNumberNode,
+                            argnames, spnames, @nospecialize body)
+    stub = Core.GeneratedFunctionStub(identity, Core.svec(argnames...), Core.svec(spnames...))
+    return stub(world, source, body)
+end
+
 # Test that `Core.CachedGenerator` works as expected
 struct Generator54916 <: Core.CachedGenerator end
 function (::Generator54916)(world::UInt, source::LineNumberNode, args...)
-    stub = Core.GeneratedFunctionStub(identity, Core.svec(:doit54916, :func, :arg), Core.svec())
-    return stub(world, source, :(func(arg)))
+    return generate_lambda_ex(world, source,
+        (:doit54916, :func, :arg), (), :(func(arg)))
 end
 @eval function doit54916(func, arg)
     $(Expr(:meta, :generated, Generator54916()))
@@ -412,8 +418,8 @@ function generator49715(world, source, self, f, tt)
     sig = Tuple{f, tt.parameters...}
     mi = Base._which(sig; world)
     error("oh no")
-    stub = Core.GeneratedFunctionStub(identity, Core.svec(:methodinstance, :ctx, :x, :f), Core.svec())
-    stub(world, source, :(nothing))
+    return generate_lambda_ex(world, source,
+        (:doit49715, :f, :tt), (), nothing)
 end
 @eval function doit49715(f, tt)
     $(Expr(:meta, :generated, generator49715))
@@ -426,9 +432,10 @@ function overdubbee54341(a, b)
     a + b
 end
 const overdubee_codeinfo54341 = code_lowered(overdubbee54341, Tuple{Any, Any})[1]
-function overdub_generator54341(world::UInt, source::LineNumberNode, args...)
-    if length(args) != 2
-        :(error("Wrong number of arguments"))
+function overdub_generator54341(world::UInt, source::LineNumberNode, selftype, fargtypes)
+    if length(fargtypes) != 2
+        return generate_lambda_ex(world, source,
+            (:overdub54341, :args), (), :(error("Wrong number of arguments")))
     else
         return copy(overdubee_codeinfo54341)
     end
@@ -438,3 +445,7 @@ end
     $(Expr(:meta, :generated_only))
 end
 @test overdub54341(1, 2) == 3
+# check if the inlining pass handles `nargs`/`isva` correctly
+@test first(only(code_typed((Int,Int)) do x, y; @inline overdub54341(x, y); end)) isa Core.CodeInfo
+@test first(only(code_typed((Int,)) do x; @inline overdub54341(x, 1); end)) isa Core.CodeInfo
+@test_throws "Wrong number of arguments" overdub54341(1, 2, 3)
diff --git a/test/strings/annotated.jl b/test/strings/annotated.jl
index da76f0f020531..ee53c3d5846eb 100644
--- a/test/strings/annotated.jl
+++ b/test/strings/annotated.jl
@@ -5,13 +5,22 @@
     @test str == Base.AnnotatedString(str.string, Tuple{UnitRange{Int}, Pair{Symbol, Any}}[])
     @test length(str) == 11
     @test ncodeunits(str) == 11
+    @test codeunits(str) == codeunits("some string")
+    @test codeunit(str) == UInt8
+    @test codeunit(str, 1) == codeunit("some string", 1)
+    @test firstindex(str) == firstindex("some string")
+    @test convert(Base.AnnotatedString, str) === str
     @test eltype(str) == Base.AnnotatedChar{eltype(str.string)}
     @test first(str) == Base.AnnotatedChar(first(str.string), Pair{Symbol, Any}[])
     @test str[1:4] isa SubString{typeof(str)}
     @test str[1:4] == Base.AnnotatedString("some")
+    big_byte_str = Base.AnnotatedString("आख")
+    @test_throws StringIndexError big_byte_str[5]
     @test "a" * str == Base.AnnotatedString("asome string")
     @test str * "a" == Base.AnnotatedString("some stringa")
     @test str * str == Base.AnnotatedString("some stringsome string")
+    @test cmp(str, "some stringy thingy") == -1
+    @test cmp("some stringy thingy", str) == 1
     @test str[3:4] == SubString("me")
     @test SubString("me") == str[3:4]
     Base.annotate!(str, 1:4, :thing => 0x01)
@@ -63,7 +72,12 @@ end
 
 @testset "AnnotatedChar" begin
     chr = Base.AnnotatedChar('c')
+    @test Base.AnnotatedChar(UInt32('c')) == chr
+    @test convert(Base.AnnotatedChar, chr) === chr
     @test chr == Base.AnnotatedChar(chr.char, Pair{Symbol, Any}[])
+    @test uppercase(chr) == Base.AnnotatedChar('C')
+    @test titlecase(chr) == Base.AnnotatedChar('C')
+    @test lowercase(Base.AnnotatedChar('C')) == chr
     str = Base.AnnotatedString("hmm", [(1:1, :attr => "h0h0"),
                                (1:2, :attr => "h0m1"),
                                (2:3, :attr => "m1m2")])
@@ -101,6 +115,8 @@ end
                      [(1:4, :label => 5),
                       (5:5, :label => 2),
                       (6:9, :label => 5)])
+    @test join((String(str1), str1), ' ') ==
+        Base.AnnotatedString("test test", [(6:9, :label => 5)])
     @test repeat(str1, 2) == Base.AnnotatedString("testtest", [(1:8, :label => 5)])
     @test repeat(str2, 2) == Base.AnnotatedString("casecase", [(2:3, :label => "oomph"),
                                                        (6:7, :label => "oomph")])
@@ -213,6 +229,12 @@ end
         @test write(aio2, Base.AnnotatedChar('c', [:b => 2, :c => 3, :d => 4])) == 1
         @test Base.annotations(aio2) == [(1:2, :a => 1), (1:3, :b => 2), (3:3, :c => 3), (3:3, :d => 4)]
     end
+    let aio2 = Base.AnnotatedIOBuffer()
+        @test write(aio2, Base.AnnotatedChar('a', [:b => 1])) == 1
+        @test write(aio2, Base.AnnotatedChar('b', [:a => 1, :b => 1])) == 1
+        @test read(seekstart(aio2), Base.AnnotatedString) ==
+            Base.AnnotatedString("ab", [(1:1, :b => 1), (2:2, :a => 1), (2:2, :b => 1)])
+    end
     # Working through an IOContext
     aio = Base.AnnotatedIOBuffer()
     wrapio = IOContext(aio)
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index 87d812c5bf201..874607f3c1b20 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -1235,6 +1235,8 @@ end
         @test !Core.Compiler.is_removable_if_unused(e) || (f, Ts)
     end
     @test_throws ArgumentError Symbol("a\0a")
+
+    @test Base._string_n_override == Core.Compiler.encode_effects_override(Base.compute_assumed_settings((:total, :(!:consistent))))
 end
 
 @testset "Ensure UTF-8 DFA can never leave invalid state" begin
@@ -1388,3 +1390,22 @@ end
         end
     end
 end
+
+@testset "transcode" begin
+    # string starting with an ASCII character
+    str_1 = "zβγ"
+    # string starting with a 2 byte UTF-8 character
+    str_2 = "αβγ"
+    # string starting with a 3 byte UTF-8 character
+    str_3 = "आख"
+    # string starting with a 4 byte UTF-8 character
+    str_4 = "𒃵𒃰"
+    @testset for str in (str_1, str_2, str_3, str_4)
+        @test transcode(String, str) === str
+        @test transcode(String, transcode(UInt16, str)) == str
+        @test transcode(String, transcode(UInt16, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(Int32, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(UInt32, transcode(UInt8, str))) == str
+        @test transcode(String, transcode(UInt8, transcode(UInt16, str))) == str
+    end
+end
diff --git a/test/strings/io.jl b/test/strings/io.jl
index f1fe0c24e8aea..209844580b3cd 100644
--- a/test/strings/io.jl
+++ b/test/strings/io.jl
@@ -165,6 +165,11 @@
     @test Base.escape_raw_string(raw"some\"string\\", '`') == "some\"string\\\\"
     @test Base.escape_raw_string(raw"some\"string") == "some\\\"string"
     @test Base.escape_raw_string(raw"some`string", '`') == "some\\`string"
+
+    # ascii and fullhex flags:
+    @test escape_string("\u00e4\u00f6\u00fc") == "\u00e4\u00f6\u00fc"
+    @test escape_string("\u00e4\u00f6\u00fc", ascii=true) == "\\ue4\\uf6\\ufc"
+    @test escape_string("\u00e4\u00f6\u00fc", ascii=true, fullhex=true) == "\\u00e4\\u00f6\\u00fc"
 end
 @testset "join()" begin
     @test join([]) == join([],",") == ""
@@ -339,3 +344,8 @@ end
 @testset "`string` return types" begin
     @test all(T -> T <: AbstractString, Base.return_types(string))
 end
+
+@testset "type stable `join` (#55389)" begin
+    itr = ("foo" for _ in 1:100)
+    @test Base.return_types(join, (typeof(itr),))[] == String
+end
diff --git a/test/strings/search.jl b/test/strings/search.jl
index e737096b3371d..c43327fe2971b 100644
--- a/test/strings/search.jl
+++ b/test/strings/search.jl
@@ -4,389 +4,409 @@
 astr = "Hello, world.\n"
 u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
 
-# I think these should give error on 4 also, and "" is not treated
-# consistently with SubString("",1,1), nor with Char[]
-for ind in (0, 5)
-    @test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
-    @test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
-end
+@testset "BoundsError for findnext/findprev" begin
+    # I think these should give error on 4 also, and "" is not treated
+    # consistently with SubString("",1,1), nor with Char[]
+    for ind in (0, 5)
+        @test_throws BoundsError findnext(SubString("",1,1), "foo", ind)
+        @test_throws BoundsError findprev(SubString("",1,1), "foo", ind)
+    end
 
-# Note: the commented out test will be enabled after fixes to make
-# sure that findnext/findprev are consistent
-# no matter what type of AbstractString the second argument is
-@test_throws BoundsError findnext(isequal('a'), "foo", 0)
-@test_throws BoundsError findnext(in(Char[]), "foo", 5)
-# @test_throws BoundsError findprev(in(Char[]), "foo", 0)
-@test_throws BoundsError findprev(in(Char[]), "foo", 5)
+    # Note: the commented out test will be enabled after fixes to make
+    # sure that findnext/findprev are consistent
+    # no matter what type of AbstractString the second argument is
+    @test_throws BoundsError findnext(isequal('a'), "foo", 0)
+    @test_throws BoundsError findnext(in(Char[]), "foo", 5)
+    # @test_throws BoundsError findprev(in(Char[]), "foo", 0)
+    @test_throws BoundsError findprev(in(Char[]), "foo", 5)
 
-# @test_throws ErrorException in("foobar","bar")
-@test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0)
+    # @test_throws ErrorException in("foobar","bar")
+    @test_throws BoundsError findnext(isequal(0x1),b"\x1\x2",0)
+end
 
-# ascii forward search
-for str in [astr, GenericString(astr)]
+@testset "ascii forward search $(typeof(str))" for str in [astr, GenericString(astr)]
     @test_throws BoundsError findnext(isequal('z'), str, 0)
     @test_throws BoundsError findnext(isequal('∀'), str, 0)
-    @test findfirst(isequal('x'), str) == nothing
-    @test findfirst(isequal('\0'), str) == nothing
-    @test findfirst(isequal('\u80'), str) == nothing
-    @test findfirst(isequal('∀'), str) == nothing
+    @test findfirst(isequal('x'), str) === nothing
+    @test findfirst(isequal('\0'), str) === nothing
+    @test findfirst(isequal('\u80'), str) === nothing
+    @test findfirst(isequal('∀'), str) === nothing
     @test findfirst(isequal('H'), str) == 1
     @test findfirst(isequal('l'), str) == 3
     @test findnext(isequal('l'), str, 4) == 4
     @test findnext(isequal('l'), str, 5) == 11
-    @test findnext(isequal('l'), str, 12) == nothing
+    @test findnext(isequal('l'), str, 12) === nothing
     @test findfirst(isequal(','), str) == 6
-    @test findnext(isequal(','), str, 7) == nothing
+    @test findnext(isequal(','), str, 7) === nothing
     @test findfirst(isequal('\n'), str) == 14
-    @test findnext(isequal('\n'), str, 15) == nothing
+    @test findnext(isequal('\n'), str, 15) === nothing
     @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1)
-end
 
-for str in [astr, GenericString(astr)]
     @test_throws BoundsError findnext('z', str, 0)
     @test_throws BoundsError findnext('∀', str, 0)
-    @test findfirst('x', str) == nothing
-    @test findfirst('\0', str) == nothing
-    @test findfirst('\u80', str) == nothing
-    @test findfirst('∀', str) == nothing
+    @test findfirst('x', str) === nothing
+    @test findfirst('\0', str) === nothing
+    @test findfirst('\u80', str) === nothing
+    @test findfirst('∀', str) === nothing
     @test findfirst('H', str) == 1
     @test findfirst('l', str) == 3
     @test findfirst('e', str) == 2
-    @test findfirst('u', str) == nothing
+    @test findfirst('u', str) === nothing
     @test findnext('l', str, 4) == 4
     @test findnext('l', str, 5) == 11
-    @test findnext('l', str, 12) == nothing
+    @test findnext('l', str, 12) === nothing
     @test findfirst(',', str) == 6
-    @test findnext(',', str, 7) == nothing
+    @test findnext(',', str, 7) === nothing
     @test findfirst('\n', str) == 14
-    @test findnext('\n', str, 15) == nothing
+    @test findnext('\n', str, 15) === nothing
     @test_throws BoundsError findnext('ε', str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext('a', str, nextind(str,lastindex(str))+1)
 end
 
-# ascii backward search
-for str in [astr]
-    @test findlast(isequal('x'), str) == nothing
-    @test findlast(isequal('\0'), str) == nothing
-    @test findlast(isequal('\u80'), str) == nothing
-    @test findlast(isequal('∀'), str) == nothing
+@testset "ascii backward search" begin
+    str = astr
+    @test findlast(isequal('x'), str) === nothing
+    @test findlast(isequal('\0'), str) === nothing
+    @test findlast(isequal('\u80'), str) === nothing
+    @test findlast(isequal('∀'), str) === nothing
     @test findlast(isequal('H'), str) == 1
-    @test findprev(isequal('H'), str, 0) == nothing
+    @test findprev(isequal('H'), str, 0) === nothing
     @test findlast(isequal('l'), str) == 11
     @test findprev(isequal('l'), str, 5) == 4
     @test findprev(isequal('l'), str, 4) == 4
     @test findprev(isequal('l'), str, 3) == 3
-    @test findprev(isequal('l'), str, 2) == nothing
+    @test findprev(isequal('l'), str, 2) === nothing
     @test findlast(isequal(','), str) == 6
-    @test findprev(isequal(','), str, 5) == nothing
+    @test findprev(isequal(','), str, 5) === nothing
     @test findlast(isequal('\n'), str) == 14
-end
 
-for str in [astr]
-    @test findlast('x', str) == nothing
-    @test findlast('\0', str) == nothing
-    @test findlast('\u80', str) == nothing
-    @test findlast('∀', str) == nothing
+    @test findlast('x', str) === nothing
+    @test findlast('\0', str) === nothing
+    @test findlast('\u80', str) === nothing
+    @test findlast('∀', str) === nothing
     @test findlast('H', str) == 1
-    @test findprev('H', str, 0) == nothing
+    @test findprev('H', str, 0) === nothing
     @test findlast('l', str) == 11
     @test findprev('l', str, 5) == 4
     @test findprev('l', str, 4) == 4
     @test findprev('l', str, 3) == 3
-    @test findprev('l', str, 2) == nothing
+    @test findprev('l', str, 2) === nothing
     @test findlast(',', str) == 6
-    @test findprev(',', str, 5) == nothing
-    @test findlast(str, "") == nothing
-    @test findlast(str^2, str) == nothing
+    @test findprev(',', str, 5) === nothing
+    @test findlast(str, "") === nothing
+    @test findlast(str^2, str) === nothing
     @test findlast('\n', str) == 14
 end
 
-# utf-8 forward search
-for str in (u8str, GenericString(u8str))
+@testset "utf-8 forward search $(typeof(str))" for str in (u8str, GenericString(u8str))
     @test_throws BoundsError findnext(isequal('z'), str, 0)
     @test_throws BoundsError findnext(isequal('∀'), str, 0)
-    @test findfirst(isequal('z'), str) == nothing
-    @test findfirst(isequal('\0'), str) == nothing
-    @test findfirst(isequal('\u80'), str) == nothing
-    @test findfirst(isequal('∄'), str) == nothing
+    @test findfirst(isequal('z'), str) === nothing
+    @test findfirst(isequal('\0'), str) === nothing
+    @test findfirst(isequal('\u80'), str) === nothing
+    @test findfirst(isequal('∄'), str) === nothing
     @test findfirst(isequal('∀'), str) == 1
     @test_throws StringIndexError findnext(isequal('∀'), str, 2)
-    @test findnext(isequal('∀'), str, 4) == nothing
+    @test findnext(isequal('∀'), str, 4) === nothing
     @test findfirst(isequal('∃'), str) == 13
     @test_throws StringIndexError findnext(isequal('∃'), str, 15)
-    @test findnext(isequal('∃'), str, 16) == nothing
+    @test findnext(isequal('∃'), str, 16) === nothing
     @test findfirst(isequal('x'), str) == 26
     @test findnext(isequal('x'), str, 27) == 43
-    @test findnext(isequal('x'), str, 44) == nothing
+    @test findnext(isequal('x'), str, 44) === nothing
     @test findfirst(isequal('δ'), str) == 17
     @test_throws StringIndexError findnext(isequal('δ'), str, 18)
     @test findnext(isequal('δ'), str, nextind(str,17)) == 33
-    @test findnext(isequal('δ'), str, nextind(str,33)) == nothing
+    @test findnext(isequal('δ'), str, nextind(str,33)) === nothing
     @test findfirst(isequal('ε'), str) == 5
     @test findnext(isequal('ε'), str, nextind(str,5)) == 54
-    @test findnext(isequal('ε'), str, nextind(str,54)) == nothing
-    @test findnext(isequal('ε'), str, nextind(str,lastindex(str))) == nothing
-    @test findnext(isequal('a'), str, nextind(str,lastindex(str))) == nothing
+    @test findnext(isequal('ε'), str, nextind(str,54)) === nothing
+    @test findnext(isequal('ε'), str, nextind(str,lastindex(str))) === nothing
+    @test findnext(isequal('a'), str, nextind(str,lastindex(str))) === nothing
     @test_throws BoundsError findnext(isequal('ε'), str, nextind(str,lastindex(str))+1)
     @test_throws BoundsError findnext(isequal('a'), str, nextind(str,lastindex(str))+1)
 end
 
-# utf-8 backward search
-for str in [u8str]
-    @test findlast(isequal('z'), str) == nothing
-    @test findlast(isequal('\0'), str) == nothing
-    @test findlast(isequal('\u80'), str) == nothing
-    @test findlast(isequal('∄'), str) == nothing
+@testset "utf-8 backward search" begin
+    str = u8str
+    @test findlast(isequal('z'), str) === nothing
+    @test findlast(isequal('\0'), str) === nothing
+    @test findlast(isequal('\u80'), str) === nothing
+    @test findlast(isequal('∄'), str) === nothing
     @test findlast(isequal('∀'), str) == 1
-    @test findprev(isequal('∀'), str, 0) == nothing
+    @test findprev(isequal('∀'), str, 0) === nothing
     @test findlast(isequal('∃'), str) == 13
     @test findprev(isequal('∃'), str, 14) == 13
     @test findprev(isequal('∃'), str, 13) == 13
-    @test findprev(isequal('∃'), str, 12) == nothing
+    @test findprev(isequal('∃'), str, 12) === nothing
     @test findlast(isequal('x'), str) == 43
     @test findprev(isequal('x'), str, 42) == 26
-    @test findprev(isequal('x'), str, 25) == nothing
+    @test findprev(isequal('x'), str, 25) === nothing
     @test findlast(isequal('δ'), str) == 33
     @test findprev(isequal('δ'), str, 32) == 17
-    @test findprev(isequal('δ'), str, 16) == nothing
+    @test findprev(isequal('δ'), str, 16) === nothing
     @test findlast(isequal('ε'), str) == 54
     @test findprev(isequal('ε'), str, 53) == 5
-    @test findprev(isequal('ε'), str, 4) == nothing
+    @test findprev(isequal('ε'), str, 4) === nothing
 end
 
-# string forward search with a single-char string
-@test findfirst("x", astr) == nothing
-@test findfirst("H", astr) == 1:1
-@test findnext("H", astr, 2) == nothing
-@test findfirst("l", astr) == 3:3
-@test findnext("l", astr, 4) == 4:4
-@test findnext("l", astr, 5) == 11:11
-@test findnext("l", astr, 12) == nothing
-@test findfirst("\n", astr) == 14:14
-@test findnext("\n", astr, 15) == nothing
-
-@test findfirst("z", u8str) == nothing
-@test findfirst("∄", u8str) == nothing
-@test findfirst("∀", u8str) == 1:1
-@test findnext("∀", u8str, 4) == nothing
-@test findfirst("∃", u8str) == 13:13
-@test findnext("∃", u8str, 16) == nothing
-@test findfirst("x", u8str) == 26:26
-@test findnext("x", u8str, 27) == 43:43
-@test findnext("x", u8str, 44) == nothing
-@test findfirst("ε", u8str) == 5:5
-@test findnext("ε", u8str, 7) == 54:54
-@test findnext("ε", u8str, 56) == nothing
-
-# strifindprev  backward search with a single-char string
-@test findlast("x", astr) == nothing
-@test findlast("H", astr) == 1:1
-@test findprev("H", astr, 2) == 1:1
-@test findprev("H", astr, 0) == nothing
-@test findlast("l", astr) == 11:11
-@test findprev("l", astr, 10) == 4:4
-@test findprev("l", astr, 4) == 4:4
-@test findprev("l", astr, 3) == 3:3
-@test findprev("l", astr, 2) == nothing
-@test findlast("\n", astr) == 14:14
-@test findprev("\n", astr, 13) == nothing
-
-@test findlast("z", u8str) == nothing
-@test findlast("∄", u8str) == nothing
-@test findlast("∀", u8str) == 1:1
-@test findprev("∀", u8str, 0) == nothing
-#TODO: setting the limit in the middle of a wide char
-#      makes findnext fail but findprev succeed.
-#      Should findprev fail as well?
-#@test findprev("∀", u8str, 2) == nothing # gives 1:3
-@test findlast("∃", u8str) == 13:13
-@test findprev("∃", u8str, 12) == nothing
-@test findlast("x", u8str) == 43:43
-@test findprev("x", u8str, 42) == 26:26
-@test findprev("x", u8str, 25) == nothing
-@test findlast("ε", u8str) == 54:54
-@test findprev("ε", u8str, 53) == 5:5
-@test findprev("ε", u8str, 4) == nothing
-
-# string forward search with a single-char regex
-@test findfirst(r"x", astr) == nothing
-@test findfirst(r"H", astr) == 1:1
-@test findnext(r"H", astr, 2) == nothing
-@test findfirst(r"l", astr) == 3:3
-@test findnext(r"l", astr, 4) == 4:4
-@test findnext(r"l", astr, 5) == 11:11
-@test findnext(r"l", astr, 12) == nothing
-@test findfirst(r"\n", astr) == 14:14
-@test findnext(r"\n", astr, 15) == nothing
-@test findfirst(r"z", u8str) == nothing
-@test findfirst(r"∄", u8str) == nothing
-@test findfirst(r"∀", u8str) == 1:1
-@test findnext(r"∀", u8str, 4) == nothing
-@test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str)
-@test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4)
-@test findfirst(r"∃", u8str) == 13:13
-@test findnext(r"∃", u8str, 16) == nothing
-@test findfirst(r"x", u8str) == 26:26
-@test findnext(r"x", u8str, 27) == 43:43
-@test findnext(r"x", u8str, 44) == nothing
-@test findfirst(r"ε", u8str) == 5:5
-@test findnext(r"ε", u8str, 7) == 54:54
-@test findnext(r"ε", u8str, 56) == nothing
-for i = 1:lastindex(astr)
-    @test findnext(r"."s, astr, i) == i:i
+@testset "string forward search with a single-char string" begin
+    @test findfirst("x", astr) === nothing
+    @test findfirst("H", astr) == 1:1
+    @test findnext("H", astr, 2) === nothing
+    @test findfirst("l", astr) == 3:3
+    @test findnext("l", astr, 4) == 4:4
+    @test findnext("l", astr, 5) == 11:11
+    @test findnext("l", astr, 12) === nothing
+    @test findfirst("\n", astr) == 14:14
+    @test findnext("\n", astr, 15) === nothing
+
+    @test findfirst("z", u8str) === nothing
+    @test findfirst("∄", u8str) === nothing
+    @test findfirst("∀", u8str) == 1:1
+    @test findnext("∀", u8str, 4) === nothing
+    @test findfirst("∃", u8str) == 13:13
+    @test findnext("∃", u8str, 16) === nothing
+    @test findfirst("x", u8str) == 26:26
+    @test findnext("x", u8str, 27) == 43:43
+    @test findnext("x", u8str, 44) === nothing
+    @test findfirst("ε", u8str) == 5:5
+    @test findnext("ε", u8str, 7) == 54:54
+    @test findnext("ε", u8str, 56) === nothing
 end
-for i = 1:lastindex(u8str)
-    if isvalid(u8str,i)
-        @test findnext(r"."s, u8str, i) == i:i
+
+@testset "findprev backward search with a single-char string" begin
+    @test findlast("x", astr) === nothing
+    @test findlast("H", astr) == 1:1
+    @test findprev("H", astr, 2) == 1:1
+    @test findprev("H", astr, 0) === nothing
+    @test findlast("l", astr) == 11:11
+    @test findprev("l", astr, 10) == 4:4
+    @test findprev("l", astr, 4) == 4:4
+    @test findprev("l", astr, 3) == 3:3
+    @test findprev("l", astr, 2) === nothing
+    @test findlast("\n", astr) == 14:14
+    @test findprev("\n", astr, 13) === nothing
+
+    @test findlast("z", u8str) === nothing
+    @test findlast("∄", u8str) === nothing
+    @test findlast("∀", u8str) == 1:1
+    @test findprev("∀", u8str, 0) === nothing
+    #TODO: setting the limit in the middle of a wide char
+    #      makes findnext fail but findprev succeed.
+    #      Should findprev fail as well?
+    #@test findprev("∀", u8str, 2) === nothing # gives 1:3
+    @test findlast("∃", u8str) == 13:13
+    @test findprev("∃", u8str, 12) === nothing
+    @test findlast("x", u8str) == 43:43
+    @test findprev("x", u8str, 42) == 26:26
+    @test findprev("x", u8str, 25) === nothing
+    @test findlast("ε", u8str) == 54:54
+    @test findprev("ε", u8str, 53) == 5:5
+    @test findprev("ε", u8str, 4) === nothing
+end
+
+@testset "string forward search with a single-char regex" begin
+    @test findfirst(r"x", astr) === nothing
+    @test findfirst(r"H", astr) == 1:1
+    @test findnext(r"H", astr, 2) === nothing
+    @test findfirst(r"l", astr) == 3:3
+    @test findnext(r"l", astr, 4) == 4:4
+    @test findnext(r"l", astr, 5) == 11:11
+    @test findnext(r"l", astr, 12) === nothing
+    @test findfirst(r"\n", astr) == 14:14
+    @test findnext(r"\n", astr, 15) === nothing
+    @test findfirst(r"z", u8str) === nothing
+    @test findfirst(r"∄", u8str) === nothing
+    @test findfirst(r"∀", u8str) == 1:1
+    @test findnext(r"∀", u8str, 4) === nothing
+    @test findfirst(r"∀", u8str) == findfirst(r"\u2200", u8str)
+    @test findnext(r"∀", u8str, 4) == findnext(r"\u2200", u8str, 4)
+    @test findfirst(r"∃", u8str) == 13:13
+    @test findnext(r"∃", u8str, 16) === nothing
+    @test findfirst(r"x", u8str) == 26:26
+    @test findnext(r"x", u8str, 27) == 43:43
+    @test findnext(r"x", u8str, 44) === nothing
+    @test findfirst(r"ε", u8str) == 5:5
+    @test findnext(r"ε", u8str, 7) == 54:54
+    @test findnext(r"ε", u8str, 56) === nothing
+    for i = 1:lastindex(astr)
+        @test findnext(r"."s, astr, i) == i:i
+    end
+    for i = 1:lastindex(u8str)
+        if isvalid(u8str,i)
+            @test findnext(r"."s, u8str, i) == i:i
+        end
     end
 end
 
-# string forward search with a zero-char string
-for i = 1:lastindex(astr)
-    @test findnext("", astr, i) == i:i-1
+@testset "string forward search with a zero-char string" begin
+    for i = 1:lastindex(astr)
+        @test findnext("", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        @test findnext("", u8str, i) == i:i-1
+    end
+    @test findfirst("", "") === 1:0
 end
-for i = 1:lastindex(u8str)
-    @test findnext("", u8str, i) == i:i-1
+
+@testset "string backward search with a zero-char string" begin
+    for i = 1:lastindex(astr)
+        @test findprev("", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        @test findprev("", u8str, i) == i:i-1
+    end
+    @test findlast("", "") === 1:0
 end
-@test findfirst("", "") === 1:0
 
-# string backward search with a zero-char string
-for i = 1:lastindex(astr)
-    @test findprev("", astr, i) == i:i-1
+@testset "string forward search with a zero-char regex" begin
+    for i = 1:lastindex(astr)
+        @test findnext(r"", astr, i) == i:i-1
+    end
+    for i = 1:lastindex(u8str)
+        # TODO: should regex search fast-forward invalid indices?
+        if isvalid(u8str,i)
+            @test findnext(r"", u8str, i) == i:i-1
+        end
+    end
 end
-for i = 1:lastindex(u8str)
-    @test findprev("", u8str, i) == i:i-1
+
+# See the comments in #54579
+@testset "Search for invalid chars" begin
+    @test findfirst(==('\xff'), "abc\xffde") == 4
+    @test findprev(isequal('\xa6'), "abc\xa69", 5) == 4
+    @test isnothing(findfirst(==('\xff'), "abcdeæd"))
+
+    @test isnothing(findnext(==('\xa6'), "æ", 1))
+    @test isnothing(findprev(==('\xa6'), "æa", 2))
 end
-@test findlast("", "") === 1:0
 
-# string forward search with a zero-char regex
-for i = 1:lastindex(astr)
-    @test findnext(r"", astr, i) == i:i-1
+@testset "string forward search with a two-char string literal" begin
+    @test findfirst("xx", "foo,bar,baz") === nothing
+    @test findfirst("fo", "foo,bar,baz") == 1:2
+    @test findnext("fo", "foo,bar,baz", 3) === nothing
+    @test findfirst("oo", "foo,bar,baz") == 2:3
+    @test findnext("oo", "foo,bar,baz", 4) === nothing
+    @test findfirst("o,", "foo,bar,baz") == 3:4
+    @test findnext("o,", "foo,bar,baz", 5) === nothing
+    @test findfirst(",b", "foo,bar,baz") == 4:5
+    @test findnext(",b", "foo,bar,baz", 6) == 8:9
+    @test findnext(",b", "foo,bar,baz", 10) === nothing
+    @test findfirst("az", "foo,bar,baz") == 10:11
+    @test findnext("az", "foo,bar,baz", 12) === nothing
 end
-for i = 1:lastindex(u8str)
-    # TODO: should regex search fast-forward invalid indices?
-    if isvalid(u8str,i)
-        @test findnext(r"", u8str, i) == i:i-1
-    end
+
+@testset "issue #9365" begin
+    # string forward search with a two-char UTF-8 (2 byte) string literal
+    @test findfirst("éé", "ééé") == 1:3
+    @test findnext("éé", "ééé", 1) == 1:3
+    # string forward search with a two-char UTF-8 (3 byte) string literal
+    @test findfirst("€€", "€€€") == 1:4
+    @test findnext("€€", "€€€", 1) == 1:4
+    # string forward search with a two-char UTF-8 (4 byte) string literal
+    @test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5
+    @test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5
+
+    # string forward search with a two-char UTF-8 (2 byte) string literal
+    @test findfirst("éé", "éé") == 1:3
+    @test findnext("éé", "éé", 1) == 1:3
+    # string forward search with a two-char UTF-8 (3 byte) string literal
+    @test findfirst("€€", "€€") == 1:4
+    @test findnext("€€", "€€", 1) == 1:4
+    # string forward search with a two-char UTF-8 (4 byte) string literal
+    @test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
+    @test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
+
+    # string backward search with a two-char UTF-8 (2 byte) string literal
+    @test findlast("éé", "ééé") == 3:5
+    @test findprev("éé", "ééé", lastindex("ééé")) == 3:5
+    # string backward search with a two-char UTF-8 (3 byte) string literal
+    @test findlast("€€", "€€€") == 4:7
+    @test findprev("€€", "€€€", lastindex("€€€")) == 4:7
+    # string backward search with a two-char UTF-8 (4 byte) string literal
+    @test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9
+    @test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9
+
+    # string backward search with a two-char UTF-8 (2 byte) string literal
+    @test findlast("éé", "éé") == 1:3        # should really be 1:4!
+    @test findprev("éé", "éé", lastindex("ééé")) == 1:3
+    # string backward search with a two-char UTF-8 (3 byte) string literal
+    @test findlast("€€", "€€") == 1:4        # should really be 1:6!
+    @test findprev("€€", "€€", lastindex("€€€")) == 1:4
+    # string backward search with a two-char UTF-8 (4 byte) string literal
+    @test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
+    @test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5
+end
+
+@testset "string backward search with a two-char string literal" begin
+    @test findlast("xx", "foo,bar,baz") === nothing
+    @test findlast("fo", "foo,bar,baz") == 1:2
+    @test findprev("fo", "foo,bar,baz", 1) === nothing
+    @test findlast("oo", "foo,bar,baz") == 2:3
+    @test findprev("oo", "foo,bar,baz", 2) === nothing
+    @test findlast("o,", "foo,bar,baz") == 3:4
+    @test findprev("o,", "foo,bar,baz", 1) === nothing
+    @test findlast(",b", "foo,bar,baz") == 8:9
+    @test findprev(",b", "foo,bar,baz", 6) == 4:5
+    @test findprev(",b", "foo,bar,baz", 3) === nothing
+    @test findlast("az", "foo,bar,baz") == 10:11
+    @test findprev("az", "foo,bar,baz", 10) === nothing
 end
 
-# string forward search with a two-char string literal
-@test findfirst("xx", "foo,bar,baz") == nothing
-@test findfirst("fo", "foo,bar,baz") == 1:2
-@test findnext("fo", "foo,bar,baz", 3) == nothing
-@test findfirst("oo", "foo,bar,baz") == 2:3
-@test findnext("oo", "foo,bar,baz", 4) == nothing
-@test findfirst("o,", "foo,bar,baz") == 3:4
-@test findnext("o,", "foo,bar,baz", 5) == nothing
-@test findfirst(",b", "foo,bar,baz") == 4:5
-@test findnext(",b", "foo,bar,baz", 6) == 8:9
-@test findnext(",b", "foo,bar,baz", 10) == nothing
-@test findfirst("az", "foo,bar,baz") == 10:11
-@test findnext("az", "foo,bar,baz", 12) == nothing
-
-# issue #9365
-# string forward search with a two-char UTF-8 (2 byte) string literal
-@test findfirst("éé", "ééé") == 1:3
-@test findnext("éé", "ééé", 1) == 1:3
-# string forward search with a two-char UTF-8 (3 byte) string literal
-@test findfirst("€€", "€€€") == 1:4
-@test findnext("€€", "€€€", 1) == 1:4
-# string forward search with a two-char UTF-8 (4 byte) string literal
-@test findfirst("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 1:5
-@test findnext("\U1f596\U1f596", "\U1f596\U1f596\U1f596", 1) == 1:5
-
-# string forward search with a two-char UTF-8 (2 byte) string literal
-@test findfirst("éé", "éé") == 1:3
-@test findnext("éé", "éé", 1) == 1:3
-# string forward search with a two-char UTF-8 (3 byte) string literal
-@test findfirst("€€", "€€") == 1:4
-@test findnext("€€", "€€", 1) == 1:4
-# string forward search with a two-char UTF-8 (4 byte) string literal
-@test findfirst("\U1f596\U1f596", "\U1f596\U1f596") == 1:5
-@test findnext("\U1f596\U1f596", "\U1f596\U1f596", 1) == 1:5
-
-# string backward search with a two-char UTF-8 (2 byte) string literal
-@test findlast("éé", "ééé") == 3:5
-@test findprev("éé", "ééé", lastindex("ééé")) == 3:5
-# string backward search with a two-char UTF-8 (3 byte) string literal
-@test findlast("€€", "€€€") == 4:7
-@test findprev("€€", "€€€", lastindex("€€€")) == 4:7
-# string backward search with a two-char UTF-8 (4 byte) string literal
-@test findlast("\U1f596\U1f596", "\U1f596\U1f596\U1f596") == 5:9
-@test findprev("\U1f596\U1f596", "\U1f596\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 5:9
-
-# string backward search with a two-char UTF-8 (2 byte) string literal
-@test findlast("éé", "éé") == 1:3        # should really be 1:4!
-@test findprev("éé", "éé", lastindex("ééé")) == 1:3
-# string backward search with a two-char UTF-8 (3 byte) string literal
-@test findlast("€€", "€€") == 1:4        # should really be 1:6!
-@test findprev("€€", "€€", lastindex("€€€")) == 1:4
-# string backward search with a two-char UTF-8 (4 byte) string literal
-@test findlast("\U1f596\U1f596", "\U1f596\U1f596") == 1:5        # should really be 1:8!
-@test findprev("\U1f596\U1f596", "\U1f596\U1f596", lastindex("\U1f596\U1f596\U1f596")) == 1:5
-
-# string backward search with a two-char string literal
-@test findlast("xx", "foo,bar,baz") == nothing
-@test findlast("fo", "foo,bar,baz") == 1:2
-@test findprev("fo", "foo,bar,baz", 1) == nothing
-@test findlast("oo", "foo,bar,baz") == 2:3
-@test findprev("oo", "foo,bar,baz", 2) == nothing
-@test findlast("o,", "foo,bar,baz") == 3:4
-@test findprev("o,", "foo,bar,baz", 1) == nothing
-@test findlast(",b", "foo,bar,baz") == 8:9
-@test findprev(",b", "foo,bar,baz", 6) == 4:5
-@test findprev(",b", "foo,bar,baz", 3) == nothing
-@test findlast("az", "foo,bar,baz") == 10:11
-@test findprev("az", "foo,bar,baz", 10) == nothing
-
-# string search with a two-char regex
-@test findfirst(r"xx", "foo,bar,baz") == nothing
-@test findfirst(r"fo", "foo,bar,baz") == 1:2
-@test findnext(r"fo", "foo,bar,baz", 3) == nothing
-@test findfirst(r"oo", "foo,bar,baz") == 2:3
-@test findnext(r"oo", "foo,bar,baz", 4) == nothing
-@test findfirst(r"o,", "foo,bar,baz") == 3:4
-@test findnext(r"o,", "foo,bar,baz", 5) == nothing
-@test findfirst(r",b", "foo,bar,baz") == 4:5
-@test findnext(r",b", "foo,bar,baz", 6) == 8:9
-@test findnext(r",b", "foo,bar,baz", 10) == nothing
-@test findfirst(r"az", "foo,bar,baz") == 10:11
-@test findnext(r"az", "foo,bar,baz", 12) == nothing
-
-# occursin with a String and Char needle
-@test occursin("o", "foo")
-@test occursin('o', "foo")
-# occursin in curried form
-@test occursin("foo")("o")
-@test occursin("foo")('o')
-
-# contains
-@test contains("foo", "o")
-@test contains("foo", 'o')
-# contains in curried form
-@test contains("o")("foo")
-@test contains('o')("foo")
-
-@test_throws ErrorException "ab" ∈ "abc"
-
-# issue #15723
-@test findfirst(isequal('('), "⨳(") == 4
-@test findnext(isequal('('), "(⨳(", 2) == 5
-@test findlast(isequal('('), "(⨳(") == 5
-@test findprev(isequal('('), "(⨳(", 2) == 1
-
-@test @inferred findall(isequal('a'), "éa") == [3]
-@test @inferred findall(isequal('€'), "€€") == [1, 4]
-@test @inferred isempty(findall(isequal('é'), ""))
-
-# issue #18109
-s_18109 = "fooα🐨βcd3"
-@test findlast(isequal('o'), s_18109) == 3
-@test findfirst(isequal('d'), s_18109) == 13
-
-# findall (issue #31788)
-@testset "findall" begin
+@testset "string search with a two-char regex" begin
+    @test findfirst(r"xx", "foo,bar,baz") === nothing
+    @test findfirst(r"fo", "foo,bar,baz") == 1:2
+    @test findnext(r"fo", "foo,bar,baz", 3) === nothing
+    @test findfirst(r"oo", "foo,bar,baz") == 2:3
+    @test findnext(r"oo", "foo,bar,baz", 4) === nothing
+    @test findfirst(r"o,", "foo,bar,baz") == 3:4
+    @test findnext(r"o,", "foo,bar,baz", 5) === nothing
+    @test findfirst(r",b", "foo,bar,baz") == 4:5
+    @test findnext(r",b", "foo,bar,baz", 6) == 8:9
+    @test findnext(r",b", "foo,bar,baz", 10) === nothing
+    @test findfirst(r"az", "foo,bar,baz") == 10:11
+    @test findnext(r"az", "foo,bar,baz", 12) === nothing
+end
+
+@testset "occursin/contains" begin
+    # occursin with a String and Char needle
+    @test occursin("o", "foo")
+    @test occursin('o', "foo")
+    # occursin in curried form
+    @test occursin("foo")("o")
+    @test occursin("foo")('o')
+
+    # contains
+    @test contains("foo", "o")
+    @test contains("foo", 'o')
+    # contains in curried form
+    @test contains("o")("foo")
+    @test contains('o')("foo")
+
+    @test_throws ErrorException "ab" ∈ "abc"
+end
+
+@testset "issue #15723" begin
+    @test findfirst(isequal('('), "⨳(") == 4
+    @test findnext(isequal('('), "(⨳(", 2) == 5
+    @test findlast(isequal('('), "(⨳(") == 5
+    @test findprev(isequal('('), "(⨳(", 2) == 1
+
+    @test @inferred findall(isequal('a'), "éa") == [3]
+    @test @inferred findall(isequal('€'), "€€") == [1, 4]
+    @test @inferred isempty(findall(isequal('é'), ""))
+end
+
+
+@testset "issue #18109" begin
+    s_18109 = "fooα🐨βcd3"
+    @test findlast(isequal('o'), s_18109) == 3
+    @test findfirst(isequal('d'), s_18109) == 13
+end
+
+@testset "findall (issue #31788)" begin
     @test findall("fooo", "foo") == UnitRange{Int}[]
     @test findall("ing", "Spinning laughing dancing") == [6:8, 15:17, 23:25]
     @test all(findall("", "foo") .=== [1:0, 2:1, 3:2, 4:3]) # use === to compare empty ranges
@@ -417,7 +437,7 @@ end
         A = T[0x40, 0x52, 0x00, 0x52, 0x00]
 
         for A in (A, @view(A[1:end]), codeunits(String(copyto!(Vector{UInt8}(undef,5), A))))
-            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) == nothing
+            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) === nothing
             @test findfirst(VT[0x52], A) === 2:2
             @test findfirst(==(VT(0x52)), A) === 2
             @test findlast(VT[0x30], A) === findlast(==(VT(0x30)), A) === nothing
@@ -445,6 +465,45 @@ end
             @test_throws BoundsError findprev(pattern, A, -3)
         end
     end
+
+    @test findall([0x01, 0x02], [0x03, 0x01, 0x02, 0x01, 0x02, 0x06]) == [2:3, 4:5]
+    @test isempty(findall([0x04, 0x05], [0x03, 0x04, 0x06]))
+end
+
+# Issue 54578
+@testset "No conflation of Int8 and UInt8" begin
+    # Work for mixed types if the values are the same
+    @test findfirst(==(Int8(1)), [0x01]) == 1
+    @test findnext(iszero, Int8[0, -2, 0, -3], 2) == 3
+    @test findfirst(Int8[1,4], UInt8[0, 2, 4, 1, 8, 1, 4, 2]) == 6:7
+    @test findprev(UInt8[5, 6], Int8[1, 9, 2, 5, 6, 3], 6) == 4:5
+
+    # Returns nothing for the same methods if the values are different,
+    # even if the bitpatterns are the same
+    @test isnothing(findfirst(==(Int8(-1)), [0xff]))
+    @test isnothing(findnext(isequal(0xff), Int8[-1, -2, -1], 2))
+    @test isnothing(findfirst(UInt8[0xff, 0xfe], Int8[0, -1, -2, 1, 8, 1, 4, 2]))
+    @test isnothing(findprev(UInt8[0xff, 0xfe], Int8[1, 9, 2, -1, -2, 3], 6))
+end
+
+@testset "DenseArray with offsets" begin
+    isdefined(Main, :OffsetDenseArrays) || @eval Main include("../testhelpers/OffsetDenseArrays.jl")
+    OffsetDenseArrays = Main.OffsetDenseArrays
+
+    A = OffsetDenseArrays.OffsetDenseArray(collect(0x61:0x69), 100)
+    @test findfirst(==(0x61), A) == 101
+    @test findlast(==(0x61), A) == 101
+    @test findfirst(==(0x00), A) === nothing
+
+    @test findfirst([0x62, 0x63, 0x64], A) == 102:104
+    @test findlast([0x63, 0x64], A) == 103:104
+    @test findall([0x62, 0x63], A) == [102:103]
+
+    @test findfirst(iszero, A) === nothing
+    A = OffsetDenseArrays.OffsetDenseArray([0x01, 0x02, 0x00, 0x03], -100)
+    @test findfirst(iszero, A) == -97
+    @test findnext(==(0x02), A, -99) == -98
+    @test findnext(==(0x02), A, -97) === nothing
 end
 
 # issue 32568
diff --git a/test/strings/types.jl b/test/strings/types.jl
index 771be253b1ec9..c09652c3a608d 100644
--- a/test/strings/types.jl
+++ b/test/strings/types.jl
@@ -2,196 +2,211 @@
 
 ## SubString and Cstring tests ##
 
-## SubString tests ##
-u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
-u8str2 = u8str^2
-len_u8str = length(u8str)
-slen_u8str = length(u8str)
-len_u8str2 = length(u8str2)
-slen_u8str2 = length(u8str2)
-
-@test len_u8str2 == 2 * len_u8str
-@test slen_u8str2 == 2 * slen_u8str
-
-u8str2plain = String(u8str2)
-
-for i1 = 1:length(u8str2)
-    if !isvalid(u8str2, i1); continue; end
-    for i2 = i1:length(u8str2)
-        if !isvalid(u8str2, i2); continue; end
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
-        @test u8str2[i1:i2] == u8str2plain[i1:i2]
+@testset "SubString" begin
+    u8str = "∀ ε > 0, ∃ δ > 0: |x-y| < δ ⇒ |f(x)-f(y)| < ε"
+    u8str2 = u8str^2
+    len_u8str = length(u8str)
+    slen_u8str = length(u8str)
+    len_u8str2 = length(u8str2)
+    slen_u8str2 = length(u8str2)
+
+    @test len_u8str2 == 2 * len_u8str
+    @test slen_u8str2 == 2 * slen_u8str
+
+    u8str2plain = String(u8str2)
+    @test !isascii(u8str2)
+    @test cmp(u8str2, u8str^3) == -1
+    @test cmp(u8str2, u8str2)  == 0
+    @test cmp(u8str^3, u8str2) == 1
+    @test codeunit(u8str2) == codeunit(u8str2plain)
+
+    @test convert(Union{String, SubString{String}}, u8str2)      === u8str2
+    @test convert(Union{String, SubString{String}}, u8str2plain) === u8str2plain
+
+    for i1 = 1:ncodeunits(u8str2)
+        if !isvalid(u8str2, i1); continue; end
+        for i2 = i1:ncodeunits(u8str2)
+            if !isvalid(u8str2, i2); continue; end
+            @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+            @test length(u8str2[i1:i2]) == length(u8str2plain[i1:i2])
+            @test u8str2[i1:i2] == u8str2plain[i1:i2]
+        end
     end
-end
 
-# tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
-# gives the same result as `getindex` (except that it is a view not a copy)
-for idx in 0:1
-    @test SubString("∀", 1, idx) == "∀"[1:idx]
-end
+    # tests that SubString of a single multibyte `Char` string, like "∀" which takes 3 bytes
+    # gives the same result as `getindex` (except that it is a view not a copy)
+    for idx in 0:1
+        @test SubString("∀", 1, idx) == "∀"[1:idx]
+    end
 
-# Substring provided with invalid end index throws BoundsError
-@test_throws StringIndexError SubString("∀", 1, 2)
-@test_throws StringIndexError SubString("∀", 1, 3)
-@test_throws BoundsError SubString("∀", 1, 4)
-
-# Substring provided with invalid start index throws BoundsError
-@test SubString("∀∀", 1:1) == "∀"
-@test SubString("∀∀", 1:4) == "∀∀"
-@test SubString("∀∀", 4:4) == "∀"
-@test_throws StringIndexError SubString("∀∀", 1:2)
-@test_throws StringIndexError SubString("∀∀", 1:5)
-@test_throws StringIndexError SubString("∀∀", 2:4)
-@test_throws BoundsError SubString("∀∀", 0:1)
-@test_throws BoundsError SubString("∀∀", 0:4)
-@test_throws BoundsError SubString("∀∀", 1:7)
-@test_throws BoundsError SubString("∀∀", 4:7)
-
-# tests for SubString of more than one multibyte `Char` string
-# we are consistent with `getindex` for `String`
-for idx in [0, 1, 4]
-    @test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
-    @test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
-end
+    @testset "invalid end index" begin
+        # Substring provided with invalid end index throws BoundsError
+        @test_throws StringIndexError SubString("∀", 1, 2)
+        @test_throws StringIndexError SubString("∀", 1, 3)
+        @test_throws BoundsError SubString("∀", 1, 4)
+    end
 
-# index beyond lastindex("∀∀")
-for idx in [2:3; 5:6]
-    @test_throws StringIndexError SubString("∀∀", 1, idx)
-end
-for idx in 7:8
-    @test_throws BoundsError SubString("∀∀", 1, idx)
-end
+    @testset "invalid start index" begin
+        # Substring provided with invalid start index throws BoundsError
+        @test SubString("∀∀", 1:1) == "∀"
+        @test SubString("∀∀", 1:4) == "∀∀"
+        @test SubString("∀∀", 4:4) == "∀"
+        @test_throws StringIndexError SubString("∀∀", 1:2)
+        @test_throws StringIndexError SubString("∀∀", 1:5)
+        @test_throws StringIndexError SubString("∀∀", 2:4)
+        @test_throws BoundsError SubString("∀∀", 0:1)
+        @test_throws BoundsError SubString("∀∀", 0:4)
+        @test_throws BoundsError SubString("∀∀", 1:7)
+        @test_throws BoundsError SubString("∀∀", 4:7)
+    end
 
-let str="tempus fugit"              #length(str)==12
-    ss=SubString(str,1,lastindex(str)) #match source string
-    @test length(ss)==length(str)
+    # tests for SubString of more than one multibyte `Char` string
+    # we are consistent with `getindex` for `String`
+    for idx in [0, 1, 4]
+        @test SubString("∀∀", 1, idx) == "∀∀"[1:idx]
+        @test SubString("∀∀", 4, idx) == "∀∀"[4:idx]
+    end
 
-    ss=SubString(str,1:lastindex(str))
-    @test length(ss)==length(str)
+    @testset "index beyond lastindex(\"∀∀\")" begin
+        for idx in [2:3; 5:6]
+            @test_throws StringIndexError SubString("∀∀", 1, idx)
+        end
+        for idx in 7:8
+            @test_throws BoundsError SubString("∀∀", 1, idx)
+        end
+    end
 
-    ss=SubString(str,1,0)    #empty SubString
-    @test length(ss)==0
+    let str="tempus fugit"              #length(str)==12
+        ss=SubString(str,1,lastindex(str)) #match source string
+        @test length(ss)==length(str)
 
-    ss=SubString(str,1:0)
-    @test length(ss)==0
+        ss=SubString(str,1:lastindex(str))
+        @test length(ss)==length(str)
 
-    @test_throws BoundsError SubString(str, 14, 20)  #start indexing beyond source string length
-    @test_throws BoundsError SubString(str, 10, 16)  #end indexing beyond source string length
+        ss=SubString(str,1,0)    #empty SubString
+        @test length(ss)==0
 
-    @test_throws BoundsError SubString("", 1, 4)  #empty source string
-    @test_throws BoundsError SubString("", 1, 1)  #empty source string, identical start and end index
-    @test_throws BoundsError SubString("", 10, 12)
-    @test SubString("", 12, 10) == ""
-end
+        ss=SubString(str,1:0)
+        @test length(ss)==0
 
-@test SubString("foobar", big(1), big(3)) == "foo"
-
-let str = "aa\u2200\u2222bb"
-    u = SubString(str, 3, 6)
-    @test length(u) == 2
-    b = IOBuffer()
-    write(b, u)
-    @test String(take!(b)) == "\u2200\u2222"
-
-    @test_throws StringIndexError SubString(str, 4, 5)
-    @test_throws BoundsError iterate(u, 0)
-    @test_throws BoundsError iterate(u, 8)
-    @test_throws BoundsError getindex(u, 0)
-    @test_throws BoundsError getindex(u, 7)
-    @test_throws BoundsError getindex(u, 0:1)
-    @test_throws BoundsError getindex(u, 7:7)
-    @test reverseind(u, 1) == 4
-    @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String}
-    @test Base.cconvert(Ptr{Int8}, u) == u
-end
+        @test_throws BoundsError SubString(str, 14, 20)  #start indexing beyond source string length
+        @test_throws BoundsError SubString(str, 10, 16)  #end indexing beyond source string length
 
-let str = "føøbar"
-    @test_throws BoundsError SubString(str, 10, 10)
-    u = SubString(str, 4, 3)
-    @test length(u) == 0
-    b = IOBuffer()
-    write(b, u)
-    @test String(take!(b)) == ""
-end
+        @test_throws BoundsError SubString("", 1, 4)  #empty source string
+        @test_throws BoundsError SubString("", 1, 1)  #empty source string, identical start and end index
+        @test_throws BoundsError SubString("", 10, 12)
+        @test SubString("", 12, 10) == ""
+    end
 
-# search and SubString (issue #5679)
-let str = "Hello, world!"
-    u = SubString(str, 1, 5)
-    @test findlast("World", u) == nothing
-    @test findlast(isequal('z'), u) == nothing
-    @test findlast("ll", u) == 3:4
-end
+    @test SubString("foobar", big(1), big(3)) == "foo"
+
+    let str = "aa\u2200\u2222bb"
+        u = SubString(str, 3, 6)
+        @test length(u) == 2
+        b = IOBuffer()
+        write(b, u)
+        @test String(take!(b)) == "\u2200\u2222"
+
+        @test_throws StringIndexError SubString(str, 4, 5)
+        @test_throws BoundsError iterate(u, 0)
+        @test_throws BoundsError iterate(u, 8)
+        @test_throws BoundsError getindex(u, 0)
+        @test_throws BoundsError getindex(u, 7)
+        @test_throws BoundsError getindex(u, 0:1)
+        @test_throws BoundsError getindex(u, 7:7)
+        @test reverseind(u, 1) == 4
+        @test typeof(Base.cconvert(Ptr{Int8}, u)) == SubString{String}
+        @test Base.cconvert(Ptr{Int8}, u) == u
+    end
 
-# SubString created from SubString
-let str = "Hello, world!"
-    u = SubString(str, 2, 5)
-    for idx in 1:4
-        @test SubString(u, 2, idx) == u[2:idx]
-        @test SubString(u, 2:idx) == u[2:idx]
+    let str = "føøbar"
+        @test_throws BoundsError SubString(str, 10, 10)
+        u = SubString(str, 4, 3)
+        @test length(u) == 0
+        b = IOBuffer()
+        write(b, u)
+        @test String(take!(b)) == ""
     end
-    @test_throws BoundsError SubString(u, 1, 10)
-    @test_throws BoundsError SubString(u, 1:10)
-    @test_throws BoundsError SubString(u, 20:30)
-    @test SubString(u, 20:15) == ""
-    @test_throws BoundsError SubString(u, -1:10)
-    @test SubString(u, -1, -10) == ""
-    @test SubString(SubString("123", 1, 2), -10, -20) == ""
-end
 
-# sizeof
-@test sizeof(SubString("abc\u2222def",4,4)) == 3
-
-# issue #3710
-@test prevind(SubString("{var}",2,4),4) == 3
-
-# issue #4183
-@test split(SubString("x", 2, 0), "y") == [""]
-
-# issue #6772
-@test parse(Float64, SubString("10",1,1)) === 1.0
-@test parse(Float64, SubString("1 0",1,1)) === 1.0
-@test parse(Float32, SubString("10",1,1)) === 1.0f0
-
-# issue #5870
-@test !occursin(Regex("aa"), SubString("",1,0))
-@test occursin(Regex(""), SubString("",1,0))
-
-# isvalid, length, prevind, nextind for SubString{String}
-let s = "lorem ipsum", sdict = Dict(
-    SubString(s, 1, 11)  => "lorem ipsum",
-    SubString(s, 1, 6)   => "lorem ",
-    SubString(s, 1, 0)   => "",
-    SubString(s, 2, 4)   => "ore",
-    SubString(s, 2, 11)  => "orem ipsum",
-    SubString(s, 15, 14) => "",
-)
-    for (ss, s) in sdict
-        @test ncodeunits(ss) == ncodeunits(s)
-        for i in -2:13
-            @test isvalid(ss, i) == isvalid(s, i)
-        end
-        for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
-            @test length(ss, i, j) == length(s, i, j)
+    @testset "search and SubString (issue #5679)" begin
+        str = "Hello, world!"
+        u = SubString(str, 1, 5)
+        @test findlast("World", u) === nothing
+        @test findlast(isequal('z'), u) === nothing
+        @test findlast("ll", u) == 3:4
+    end
+
+    @testset "SubString created from SubString" begin
+        str = "Hello, world!"
+        u = SubString(str, 2, 5)
+        for idx in 1:4
+            @test SubString(u, 2, idx) == u[2:idx]
+            @test SubString(u, 2:idx) == u[2:idx]
         end
+        @test_throws BoundsError SubString(u, 1, 10)
+        @test_throws BoundsError SubString(u, 1:10)
+        @test_throws BoundsError SubString(u, 20:30)
+        @test SubString(u, 20:15) == ""
+        @test_throws BoundsError SubString(u, -1:10)
+        @test SubString(u, -1, -10) == ""
+        @test SubString(SubString("123", 1, 2), -10, -20) == ""
+    end
+
+    # sizeof
+    @test sizeof(SubString("abc\u2222def",4,4)) == 3
+
+    # issue #3710
+    @test prevind(SubString("{var}",2,4),4) == 3
+
+    # issue #4183
+    @test split(SubString("x", 2, 0), "y") == [""]
+
+    @testset "issue #6772" begin
+        @test parse(Float64, SubString("10",1,1)) === 1.0
+        @test parse(Float64, SubString("1 0",1,1)) === 1.0
+        @test parse(Float32, SubString("10",1,1)) === 1.0f0
     end
-    for (ss, s) in sdict
-        @test length(ss) == length(s)
-        for i in 0:ncodeunits(ss), j = 0:length(ss)+1
-            @test prevind(ss, i+1, j) == prevind(s, i+1, j)
-            @test nextind(ss, i, j) == nextind(s, i, j)
+
+    @testset "issue #5870" begin
+        @test !occursin(Regex("aa"), SubString("",1,0))
+        @test occursin(Regex(""), SubString("",1,0))
+    end
+    @testset" isvalid, length, prevind, nextind for SubString{String}" begin
+        s = "lorem ipsum"
+        sdict = Dict(
+            SubString(s, 1, 11)  => "lorem ipsum",
+            SubString(s, 1, 6)   => "lorem ",
+            SubString(s, 1, 0)   => "",
+            SubString(s, 2, 4)   => "ore",
+            SubString(s, 2, 11)  => "orem ipsum",
+            SubString(s, 15, 14) => "",
+        )
+        for (ss, s) in sdict
+            @test ncodeunits(ss) == ncodeunits(s)
+            for i in -2:13
+                @test isvalid(ss, i) == isvalid(s, i)
+            end
+            for i in 1:ncodeunits(ss), j = i-1:ncodeunits(ss)
+                @test length(ss, i, j) == length(s, i, j)
+            end
+        end
+        for (ss, s) in sdict
+            @test length(ss) == length(s)
+            for i in 0:ncodeunits(ss), j = 0:length(ss)+1
+                @test prevind(ss, i+1, j) == prevind(s, i+1, j)
+                @test nextind(ss, i, j) == nextind(s, i, j)
+            end
+            @test_throws BoundsError prevind(s, 0)
+            @test_throws BoundsError prevind(ss, 0)
+            @test_throws BoundsError nextind(s, ncodeunits(ss)+1)
+            @test_throws BoundsError nextind(ss, ncodeunits(ss)+1)
         end
-        @test_throws BoundsError prevind(s, 0)
-        @test_throws BoundsError prevind(ss, 0)
-        @test_throws BoundsError nextind(s, ncodeunits(ss)+1)
-        @test_throws BoundsError nextind(ss, ncodeunits(ss)+1)
     end
-end
 
-# proper nextind/prevind/thisind for SubString{String}
-let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀",
+    rng = MersenneTwister(1)
+    strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"∀∃∀",
                                       String(rand(rng, UInt8, 50))]
-    for s in strs
+    @testset "proper nextind/prevind/thisind for SubString{String}: $(repr(s))" for s in strs
         a = 0
         while a <= ncodeunits(s)
             a = nextind(s, a)
@@ -223,111 +238,115 @@ let rng = MersenneTwister(1), strs = ["∀∃∀"*String(rand(rng, UInt8, 40))*"
             end
         end
     end
-end
 
-# for isvalid(SubString{String})
-let s = "Σx + βz - 2"
-    for i in -1:ncodeunits(s)+2
-        if checkbounds(Bool, s, i)
-            if isvalid(s, i)
-                ss = SubString(s, 1, i)
-                for j = 1:ncodeunits(ss)
-                    @test isvalid(ss, j) == isvalid(s, j)
+    # for isvalid(SubString{String})
+    let s = "Σx + βz - 2"
+        for i in -1:ncodeunits(s)+2
+            if checkbounds(Bool, s, i)
+                if isvalid(s, i)
+                    ss = SubString(s, 1, i)
+                    for j = 1:ncodeunits(ss)
+                        @test isvalid(ss, j) == isvalid(s, j)
+                    end
+                else
+                    @test_throws StringIndexError SubString(s, 1, i)
                 end
+            elseif i > 0
+                @test_throws BoundsError SubString(s, 1, i)
             else
-                @test_throws StringIndexError SubString(s, 1, i)
+                @test SubString(s, 1, i) == ""
             end
-        elseif i > 0
-            @test_throws BoundsError SubString(s, 1, i)
-        else
-            @test SubString(s, 1, i) == ""
         end
     end
-end
 
-let ss = SubString("hello", 1, 5)
-    @test length(ss, 1, 0) == 0
-    @test_throws BoundsError length(ss, 1, -1)
-    @test_throws BoundsError length(ss, 1, 6)
-    @test_throws BoundsError length(ss, 1, 10)
-    @test_throws BoundsError prevind(ss, 0, 1)
-    @test prevind(ss, 1, 1) == 0
-    @test prevind(ss, 6, 1) == 5
-    @test_throws BoundsError prevind(ss, 7, 1)
-    @test_throws BoundsError nextind(ss, -1, 1)
-    @test nextind(ss, 0, 1) == 1
-    @test nextind(ss, 5, 1) == 6
-    @test_throws BoundsError nextind(ss, 6, 1)
-end
+    let ss = SubString("hello", 1, 5)
+        @test length(ss, 1, 0) == 0
+        @test_throws BoundsError length(ss, 1, -1)
+        @test_throws BoundsError length(ss, 1, 6)
+        @test_throws BoundsError length(ss, 1, 10)
+        @test_throws BoundsError prevind(ss, 0, 1)
+        @test prevind(ss, 1, 1) == 0
+        @test prevind(ss, 6, 1) == 5
+        @test_throws BoundsError prevind(ss, 7, 1)
+        @test_throws BoundsError nextind(ss, -1, 1)
+        @test nextind(ss, 0, 1) == 1
+        @test nextind(ss, 5, 1) == 6
+        @test_throws BoundsError nextind(ss, 6, 1)
+    end
 
-# length(SubString{String}) performance specialization
-let s = "|η(α)-ϕ(κ)| < ε"
-    @test length(SubString(s, 1, 0)) == length(s[1:0])
-    @test length(SubString(s, 4, 4)) == length(s[4:4])
-    @test length(SubString(s, 1, 7)) == length(s[1:7])
-    @test length(SubString(s, 4, 11)) == length(s[4:11])
-end
+    # length(SubString{String}) performance specialization
+    let s = "|η(α)-ϕ(κ)| < ε"
+        @test length(SubString(s, 1, 0)) == length(s[1:0])
+        @test length(SubString(s, 4, 4)) == length(s[4:4])
+        @test length(SubString(s, 1, 7)) == length(s[1:7])
+        @test length(SubString(s, 4, 11)) == length(s[4:11])
+    end
 
-@testset "reverseind" for T in (String, SubString, GenericString)
-    for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
-        for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
-            for c in ('X', 'δ', '\U0001d6a5')
-                s = convert(T, string(prefix, c, suffix))
-                r = reverse(s)
-                ri = findfirst(isequal(c), r)
-                @test c == s[reverseind(s, ri)] == r[ri]
-                s = convert(T, string(prefix, prefix, c, suffix, suffix))
-                pre = convert(T, prefix)
-                sb = SubString(s, nextind(pre, lastindex(pre)),
-                               lastindex(convert(T, string(prefix, prefix, c, suffix))))
-                r = reverse(sb)
-                ri = findfirst(isequal(c), r)
-                @test c == sb[reverseind(sb, ri)] == r[ri]
+    @testset "reverseind" for T in (String, SubString, GenericString)
+        for prefix in ("", "abcd", "\U0001d6a4\U0001d4c1", "\U0001d6a4\U0001d4c1c", " \U0001d6a4\U0001d4c1")
+            for suffix in ("", "abcde", "\U0001d4c1β\U0001d6a4", "\U0001d4c1β\U0001d6a4c", " \U0001d4c1β\U0001d6a4")
+                for c in ('X', 'δ', '\U0001d6a5')
+                    s = convert(T, string(prefix, c, suffix))
+                    r = reverse(s)
+                    ri = findfirst(isequal(c), r)
+                    @test c == s[reverseind(s, ri)] == r[ri]
+                    s = convert(T, string(prefix, prefix, c, suffix, suffix))
+                    pre = convert(T, prefix)
+                    sb = SubString(s, nextind(pre, lastindex(pre)),
+                                   lastindex(convert(T, string(prefix, prefix, c, suffix))))
+                    r = reverse(sb)
+                    ri = findfirst(isequal(c), r)
+                    @test c == sb[reverseind(sb, ri)] == r[ri]
+                end
             end
         end
     end
-end
 
-@testset "reverseind of empty strings" begin
-    for s in ("",
-              SubString("", 1, 0),
-              SubString("ab", 1, 0),
-              SubString("ab", 2, 1),
-              SubString("ab", 3, 2),
-              GenericString(""))
-        @test reverseind(s, 0) == 1
-        @test reverseind(s, 1) == 0
+    @testset "reverseind of empty strings" begin
+        for s in ("",
+                  SubString("", 1, 0),
+                  SubString("ab", 1, 0),
+                  SubString("ab", 2, 1),
+                  SubString("ab", 3, 2),
+                  GenericString(""))
+            @test reverseind(s, 0) == 1
+            @test reverseind(s, 1) == 0
+        end
     end
 end
 
-## Cstring tests ##
-
-# issue #13974: comparison against pointers
-let
-    str = String("foobar")
-    ptr = pointer(str)
-    cstring = Cstring(ptr)
-    @test ptr == cstring
-    @test cstring == ptr
-
-    # convenient NULL string creation from Ptr{Cvoid}
-    nullstr = Cstring(C_NULL)
-
-    # Comparisons against NULL strings
-    @test ptr != nullstr
-    @test nullstr != ptr
+@testset "Cstring" begin
+    @testset "issue #13974: comparison against pointers" begin
+        str = String("foobar")
+        ptr = pointer(str)
+        cstring = Cstring(ptr)
+        @test ptr == cstring
+        @test cstring == ptr
+
+        # convenient NULL string creation from Ptr{Cvoid}
+        nullstr = Cstring(C_NULL)
+
+        # Comparisons against NULL strings
+        @test ptr != nullstr
+        @test nullstr != ptr
+
+        # Short-hand comparison against C_NULL
+        @test nullstr == C_NULL
+        @test C_NULL == nullstr
+        @test cstring != C_NULL
+        @test C_NULL != cstring
+    end
 
-    # Short-hand comparison against C_NULL
-    @test nullstr == C_NULL
-    @test C_NULL == nullstr
-    @test cstring != C_NULL
-    @test C_NULL != cstring
+    @testset "issue #31381: eltype(Cstring) != Cchar" begin
+        s = Cstring(C_NULL)
+        @test eltype(Cstring) == Cchar
+        @test eltype(s) == Cchar
+        @test pointer(s) isa Ptr{Cchar}
+    end
 end
 
-# issue #31381: eltype(Cstring) != Cchar
-let
-    s = Cstring(C_NULL)
-    @test eltype(Cstring) == Cchar
-    @test eltype(s) == Cchar
-    @test pointer(s) isa Ptr{Cchar}
+@testset "Codeunits" begin
+    s = "I'm a string!"
+    @test codeunit(s) == UInt8
+    @test codeunit(s, Int8(1)) == codeunit(s, 1)
 end
diff --git a/test/strings/util.jl b/test/strings/util.jl
index 59638dc3b9ca6..ae16e24f4ea8b 100644
--- a/test/strings/util.jl
+++ b/test/strings/util.jl
@@ -2,6 +2,20 @@
 
 SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
 
+@testset "textwidth" begin
+    for (c, w) in [('x', 1), ('α', 1), ('🍕', 2), ('\0', 0), ('\u0302', 0), ('\xc0', 1)]
+        @test textwidth(c) == w
+        @test textwidth(c^3) == w*3
+        @test w == @invoke textwidth(c::AbstractChar)
+    end
+    for i in 0x00:0x7f # test all ASCII chars (which have fast path)
+        w = Int(ccall(:utf8proc_charwidth, Cint, (UInt32,), i))
+        c = Char(i)
+        @test textwidth(c) == w
+        @test w == @invoke textwidth(c::AbstractChar)
+    end
+end
+
 @testset "padding (lpad and rpad)" begin
     @test lpad("foo", 2) == "foo"
     @test rpad("foo", 2) == "foo"
@@ -53,6 +67,52 @@ SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
     @test rpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
 end
 
+@testset "string truncation (ltruncate, rtruncate, ctruncate)" begin
+    @test ltruncate("foo", 4) == "foo"
+    @test ltruncate("foo", 3) == "foo"
+    @test ltruncate("foo", 2) == "…o"
+    @test ltruncate("🍕🍕 I love 🍕", 10) == "…I love 🍕" # handle wide emojis
+    @test ltruncate("🍕🍕 I love 🍕", 10, "[…]") == "[…]love 🍕"
+    # when the replacement string is longer than the trunc
+    # trust that the user wants the replacement string rather than erroring
+    @test ltruncate("abc", 2, "xxxxxx") == "xxxxxx"
+
+    @inferred ltruncate("xxx", 4)
+    @inferred ltruncate("xxx", 2)
+    @inferred ltruncate(@view("xxxxxxx"[1:4]), 4)
+    @inferred ltruncate(@view("xxxxxxx"[1:4]), 2)
+
+    @test rtruncate("foo", 4) == "foo"
+    @test rtruncate("foo", 3) == "foo"
+    @test rtruncate("foo", 2) == "f…"
+    @test rtruncate("🍕🍕 I love 🍕", 10) == "🍕🍕 I lo…"
+    @test rtruncate("🍕🍕 I love 🍕", 10, "[…]") == "🍕🍕 I […]"
+    @test rtruncate("abc", 2, "xxxxxx") == "xxxxxx"
+
+    @inferred rtruncate("xxx", 4)
+    @inferred rtruncate("xxx", 2)
+    @inferred rtruncate(@view("xxxxxxx"[1:4]), 4)
+    @inferred rtruncate(@view("xxxxxxx"[1:4]), 2)
+
+    @test ctruncate("foo", 4) == "foo"
+    @test ctruncate("foo", 3) == "foo"
+    @test ctruncate("foo", 2) == "f…"
+    @test ctruncate("foo", 2; prefer_left=true) == "f…"
+    @test ctruncate("foo", 2; prefer_left=false) == "…o"
+    @test ctruncate("foobar", 6) == "foobar"
+    @test ctruncate("foobar", 5) == "fo…ar"
+    @test ctruncate("foobar", 4) == "fo…r"
+    @test ctruncate("🍕🍕 I love 🍕", 10) == "🍕🍕 …e 🍕"
+    @test ctruncate("🍕🍕 I love 🍕", 10, "[…]") == "🍕🍕[…] 🍕"
+    @test ctruncate("abc", 2, "xxxxxx") == "xxxxxx"
+    @test ctruncate("🍕🍕🍕🍕🍕🍕xxxxxxxxxxx", 9) == "🍕🍕…xxxx"
+
+    @inferred ctruncate("xxxxx", 5)
+    @inferred ctruncate("xxxxx", 3)
+    @inferred ctruncate(@view("xxxxxxx"[1:5]), 5)
+    @inferred ctruncate(@view("xxxxxxx"[1:5]), 3)
+end
+
 # string manipulation
 @testset "lstrip/rstrip/strip" begin
     @test strip("") == ""
diff --git a/test/subtype.jl b/test/subtype.jl
index c26f4fc9d30e2..7be869107b432 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -707,16 +707,17 @@ macro testintersect(a, b, result)
     a = esc(a)
     b = esc(b)
     result = esc(result)
-    Base.remove_linenums!(quote
+    # use a manual macrocall expression since Test will examine this __source__ value
+    return quote
         # test real intersect
-        @test $cmp(_type_intersect($a, $b), $result)
-        @test $cmp(_type_intersect($b, $a), $result)
+        $(Expr(:macrocall, :var"@test", __source__, :($cmp(_type_intersect($a, $b), $result))))
+        $(Expr(:macrocall, :var"@test", __source__, :($cmp(_type_intersect($b, $a), $result))))
         # test simplified intersect
         if !($result === Union{})
-            @test typeintersect($a, $b) != Union{}
-            @test typeintersect($b, $a) != Union{}
+            $(Expr(:macrocall, :var"@test", __source__, :(typeintersect($a, $b) != Union{})))
+            $(Expr(:macrocall, :var"@test", __source__, :(typeintersect($b, $a) != Union{})))
         end
-    end)
+    end
 end
 
 abstract type IT4805_2{N, T} end
@@ -2267,31 +2268,46 @@ let S = Tuple{Integer, U} where {II<:Array, U<:Tuple{Vararg{II, 1}}}
     @testintersect(S, Tuple{Int, U} where {N, U<:Tuple{Any,Any,Vararg{Any,N}}}, Union{})
 end
 
+function equal_envs(env1, env2)
+    length(env1) == length(env2) || return false
+    for i = 1:length(env1)
+        a = env1[i]
+        b = env2[i]
+        if a isa TypeVar
+            if !(b isa TypeVar && a.name == b.name && a.lb == b.lb && a.ub == b.ub)
+                return false
+            end
+        elseif !(a == b)
+            return false
+        end
+    end
+    return true
+end
+
 # issue #43064
 let
-    env_tuple(@nospecialize(x), @nospecialize(y)) = (intersection_env(x, y)[2]...,)
-    all_var(x::UnionAll) = (x.var, all_var(x.body)...)
-    all_var(x::DataType) = ()
+    env_tuple(@nospecialize(x), @nospecialize(y)) = intersection_env(x, y)[2]
     TT0 = Tuple{Type{T},Union{Real,Missing,Nothing}} where {T}
     TT1 = Union{Type{Int8},Type{Int16}}
     @test env_tuple(Tuple{TT1,Missing}, TT0) ===
           env_tuple(Tuple{TT1,Nothing}, TT0) ===
-          env_tuple(Tuple{TT1,Int}, TT0) === all_var(TT0)
+          env_tuple(Tuple{TT1,Int}, TT0) ===
+          Core.svec(TT0.var)
 
     TT0 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T1,T2}
     TT1 = Tuple{T1,T2,Union{Real,Missing,Nothing}} where {T2,T1}
     TT2 = Tuple{Union{Int,Int8},Union{Int,Int8},Int}
     TT3 = Tuple{Int,Union{Int,Int8},Int}
-    @test env_tuple(TT2, TT0) === all_var(TT0)
-    @test env_tuple(TT2, TT1) === all_var(TT1)
-    @test env_tuple(TT3, TT0) === Base.setindex(all_var(TT0), Int, 1)
-    @test env_tuple(TT3, TT1) === Base.setindex(all_var(TT1), Int, 2)
+    @test equal_envs(env_tuple(TT2, TT0), Core.svec(TypeVar(:T1, Union{Int, Int8}), TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT2, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), TypeVar(:T1, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT3, TT0), Core.svec(Int, TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT3, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), Int))
 
     TT0 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T1,T2}
     TT1 = Tuple{T1,T2,T1,Union{Real,Missing,Nothing}} where {T2,T1}
     TT2 = Tuple{Int,Union{Int,Int8},Int,Int}
-    @test env_tuple(TT2, TT0) === Base.setindex(all_var(TT0), Int, 1)
-    @test env_tuple(TT2, TT1) === Base.setindex(all_var(TT1), Int, 2)
+    @test equal_envs(env_tuple(TT2, TT0), Core.svec(Int, TypeVar(:T2, Union{Int, Int8})))
+    @test equal_envs(env_tuple(TT2, TT1), Core.svec(TypeVar(:T2, Union{Int, Int8}), Int))
 end
 
 #issue #46735
@@ -2380,12 +2396,41 @@ let S = Tuple{T2, V2} where {T2, N2, V2<:(Array{S2, N2} where {S2 <: T2})},
     @testintersect(S, T, !Union{})
 end
 
-# A simple case which has a small local union.
-# make sure the env is not widened too much when we intersect(Int8, Int8).
-struct T48006{A1,A2,A3} end
-@testintersect(Tuple{T48006{Float64, Int, S1}, Int} where {F1<:Real, S1<:Union{Int8, Val{F1}}},
-               Tuple{T48006{F2, I, S2}, I} where {F2<:Real, I<:Int, S2<:Union{Int8, Val{F2}}},
-               Tuple{T48006{Float64, Int, S1}, Int} where S1<:Union{Val{Float64}, Int8})
+let S = Dict{Int, S1} where {F1, S1<:Union{Int8, Val{F1}}},
+    T = Dict{F2, S2} where {F2, S2<:Union{Int8, Val{F2}}}
+    @test_broken typeintersect(S, T) == Dict{Int, S} where S<:Union{Val{Int}, Int8}
+    @test typeintersect(T, S) == Dict{Int, S} where S<:Union{Val{Int}, Int8}
+end
+
+# Ensure inner `intersect_all` never under-esitimate.
+let S = Tuple{F1, Dict{Int, S1}} where {F1, S1<:Union{Int8, Val{F1}}},
+    T = Tuple{Any, Dict{F2, S2}} where {F2, S2<:Union{Int8, Val{F2}}}
+    @test Tuple{Nothing, Dict{Int, Int8}} <: S
+    @test Tuple{Nothing, Dict{Int, Int8}} <: T
+    @test Tuple{Nothing, Dict{Int, Int8}} <: typeintersect(S, T)
+    @test Tuple{Nothing, Dict{Int, Int8}} <: typeintersect(T, S)
+end
+
+let S = Tuple{F1, Val{S1}} where {F1, S1<:Dict{F1}}
+    T = Tuple{Any, Val{S2}} where {F2, S2<:Union{map(T->Dict{T}, Base.BitInteger_types)...}}
+    ST = typeintersect(S, T)
+    TS = typeintersect(S, T)
+    for U in Base.BitInteger_types
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: S
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: T
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: ST
+        @test Tuple{U, Val{Dict{U,Nothing}}} <: TS
+    end
+end
+
+#issue 55206
+struct T55206{A,B<:Complex{A},C<:Union{Dict{Nothing},Dict{A}}} end
+@testintersect(T55206, T55206{<:Any,<:Any,<:Dict{Nothing}}, T55206{A,<:Complex{A},<:Dict{Nothing}} where {A})
+@testintersect(
+    Tuple{Dict{Int8, Int16}, Val{S1}} where {F1, S1<:AbstractSet{F1}},
+    Tuple{Dict{T1, T2}, Val{S2}} where {T1, T2, S2<:Union{Set{T1},Set{T2}}},
+    Tuple{Dict{Int8, Int16}, Val{S1}} where {S1<:Union{Set{Int8},Set{Int16}}}
+)
 
 f48167(::Type{Val{L2}}, ::Type{Union{Val{L1}, Set{R}}}) where {L1, R, L2<:L1} = 1
 f48167(::Type{Val{L1}}, ::Type{Union{Val{L2}, Set{R}}}) where {L1, R, L2<:L1} = 2
@@ -2554,7 +2599,7 @@ end
 let T = Tuple{Union{Type{T}, Type{S}}, Union{Val{T}, Val{S}}, Union{Val{T}, S}} where T<:Val{A} where A where S<:Val,
     S = Tuple{Type{T}, T, Val{T}} where T<:(Val{S} where S<:Val)
     # optimal = Union{}?
-    @test typeintersect(T, S) == Tuple{Type{A}, Union{Val{A}, Val{S} where S<:Union{Val, A}, Val{x} where x<:Val, Val{x} where x<:Union{Val, A}}, Val{A}} where A<:(Val{S} where S<:Val)
+    @test typeintersect(T, S) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {S<:Val, T<:Val}
     @test typeintersect(S, T) == Tuple{Type{T}, Union{Val{T}, Val{S}}, Val{T}} where {T<:Val, S<:(Union{Val{A}, Val} where A)}
 end
 
@@ -2657,3 +2702,22 @@ let S = Tuple{Val{<:T}, Union{Int,T}} where {T},
     @testintersect(S, T, !Union{})
     @test !Base.has_free_typevars(typeintersect(S, T))
 end
+
+#issue 55230
+let T1 = NTuple{12, Union{Val{1}, Val{2}, Val{3}, Val{4}, Val{5}, Val{6}}}
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Val}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Real}
+    @test !(T1 <: T2)
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Union{Val,Real}}
+    @test T1 <: T2
+    T2 = Tuple{<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Union{String,Real}}
+    @test !(T1 <: T2)
+    T2 = Tuple{<:Union{Val,Real},<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test T1 <: T2
+    T2 = Tuple{<:Union{String,Real},<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any,<:Any}
+    @test !(T1 <: T2)
+    @test Tuple{Union{Val{1},Val{2}}} <: Tuple{S} where {T, S<:Val{T}}
+end
diff --git a/test/syntax.jl b/test/syntax.jl
index 0855c643e1423..c19721b5c54b3 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -2645,9 +2645,9 @@ end
 @test_throws ErrorException("invalid method definition in Mod3: function Mod3.f must be explicitly imported to be extended") Core.eval(Mod3, :(f(x::Int) = x))
 @test !isdefined(Mod3, :always_undef) # resolve this binding now in Mod3
 @test_throws ErrorException("invalid method definition in Mod3: exported function Mod.always_undef does not exist") Core.eval(Mod3, :(always_undef(x::Int) = x))
-@test_throws ErrorException("cannot assign a value to imported variable Mod.always_undef from module Mod3") Core.eval(Mod3, :(const always_undef = 3))
-@test_throws ErrorException("cannot assign a value to imported variable Mod3.f") Core.eval(Mod3, :(const f = 3))
-@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it already has a value") Core.eval(Mod, :(const maybe_undef = 3))
+@test_throws ErrorException("cannot declare Mod3.always_undef constant; it was already declared as an import") Core.eval(Mod3, :(const always_undef = 3))
+@test_throws ErrorException("cannot declare Mod3.f constant; it was already declared as an import") Core.eval(Mod3, :(const f = 3))
+@test_throws ErrorException("cannot declare Mod.maybe_undef constant; it was already declared global") Core.eval(Mod, :(const maybe_undef = 3))
 
 z = 42
 import .z as also_z
@@ -3704,7 +3704,8 @@ end
 module Foreign54607
     # Syntactic, not dynamic
     try_to_create_binding1() = (Foreign54607.foo = 2)
-    @eval try_to_create_binding2() = ($(GlobalRef(Foreign54607, :foo)) = 2)
+    # GlobalRef is allowed for same-module assignment
+    @eval try_to_create_binding2() = ($(GlobalRef(Foreign54607, :foo2)) = 2)
     function global_create_binding()
         global bar
         bar = 3
@@ -3719,6 +3720,11 @@ end
 @test_throws ErrorException (Foreign54607.foo = 1)
 @test_throws ErrorException Foreign54607.try_to_create_binding1()
 @test_throws ErrorException Foreign54607.try_to_create_binding2()
+function assign_in_foreign_module()
+    (Foreign54607.foo = 1)
+    nothing
+end
+@test !Core.Compiler.is_nothrow(Base.infer_effects(assign_in_foreign_module))
 @test_throws ErrorException begin
     @Base.Experimental.force_compile
     (Foreign54607.foo = 1)
@@ -3904,3 +3910,80 @@ module ExtendedIsDefined
         @test !$(Expr(:isdefined, GlobalRef(@__MODULE__, :x4), false))
     end
 end
+
+# Test importing the same module twice using two different paths
+module FooDualImport
+end
+module BarDualImport
+import ..FooDualImport
+import ..FooDualImport.FooDualImport
+end
+
+# Test trying to define a constant and then importing the same constant
+const ImportConstant = 1
+module ImportConstantTestModule
+    using Test
+    const ImportConstant = 1
+    import ..ImportConstant
+    @test ImportConstant == 1
+    @test isconst(@__MODULE__, :ImportConstant)
+end
+
+# Test trying to define a constant and then trying to assign to the same value
+module AssignConstValueTest
+    const x = 1
+    x = 1
+end
+@test isconst(AssignConstValueTest, :x)
+
+# Module Replacement
+module ReplacementContainer
+    module ReplaceMe
+        const x = 1
+    end
+    const Old = ReplaceMe
+    module ReplaceMe
+        const x = 2
+    end
+end
+@test ReplacementContainer.Old !== ReplacementContainer.ReplaceMe
+@test ReplacementContainer.ReplaceMe.x === 2
+
+# Setglobal of previously declared global
+module DeclareSetglobal
+    using Test
+    @test_throws ErrorException setglobal!(@__MODULE__, :DeclareMe, 1)
+    global DeclareMe
+    setglobal!(@__MODULE__, :DeclareMe, 1)
+    @test DeclareMe === 1
+end
+
+# Binding type of const (N.B.: This may change in the future)
+module ConstBindingType
+    using Test
+    const x = 1
+    @test Core.get_binding_type(@__MODULE__, :x) === Any
+end
+
+# Explicit import may resolve using failed
+module UsingFailedExplicit
+    using Test
+    module A; export x; x = 1; end
+    module B; export x; x = 2; end
+    using .A, .B
+    @test_throws UndefVarError x
+    using .A: x as x
+    @test x === 1
+end
+
+# issue #45494
+begin
+  local b::Tuple{<:Any} = (0,)
+  function f45494()
+    b = b
+    b
+  end
+end
+@test f45494() === (0,)
+
+@test_throws "\"esc(...)\" used outside of macro expansion" eval(esc(:(const x=1)))
diff --git a/test/testdefs.jl b/test/testdefs.jl
index b96c95045f2bd..eb0bf570b11fd 100644
--- a/test/testdefs.jl
+++ b/test/testdefs.jl
@@ -23,7 +23,7 @@ function runtests(name, path, isolate=true; seed=nothing)
         end
         res_and_time_data = @timed @testset "$name" begin
             # Random.seed!(nothing) will fail
-            seed != nothing && Random.seed!(seed)
+            seed !== nothing && Random.seed!(seed)
 
             original_depot_path = copy(Base.DEPOT_PATH)
             original_load_path = copy(Base.LOAD_PATH)
diff --git a/test/testhelpers/ChallengePrompts.jl b/test/testhelpers/ChallengePrompts.jl
new file mode 100644
index 0000000000000..10dd1553afbbd
--- /dev/null
+++ b/test/testhelpers/ChallengePrompts.jl
@@ -0,0 +1,123 @@
+module ChallengePrompts
+
+include("FakePTYs.jl")
+using .FakePTYs: with_fake_pty
+using Serialization: serialize, deserialize
+
+const timeout = 60
+
+"""
+    challenge_prompt(code::Expr, challenges; pkgs=[])
+
+Execute the passed code in a separate process, looking for
+the passed prompts and responding as defined in the pairs of
+(prompt, response) in the collection of challenges.
+
+Optionally `import` the given `pkgs`.
+
+Returns the value of the last expression.
+"""
+function challenge_prompt(code::Expr, challenges; pkgs=[])
+    input_code = tempname()
+    open(input_code, "w") do fp
+        serialize(fp, code)
+    end
+    output_file = tempname()
+    torun = """
+        $(isempty(pkgs) ? "" : string("import ", join(pkgs, ", ")))
+        using Serialization
+        result = open($(repr(input_code))) do fp
+            eval(deserialize(fp))
+        end
+        open($(repr(output_file)), "w") do fp
+            serialize(fp, result)
+        end"""
+    cmd = `$(Base.julia_cmd()) --startup-file=no -e $torun`
+    try
+        challenge_prompt(cmd, challenges)
+        return open(output_file, "r") do fp
+            deserialize(fp)
+        end
+    finally
+        isfile(output_file) && rm(output_file)
+        isfile(input_code) && rm(input_code)
+    end
+    return nothing
+end
+
+function challenge_prompt(cmd::Cmd, challenges)
+    function format_output(output)
+        str = read(seekstart(output), String)
+        isempty(str) && return ""
+        return "Process output found:\n\"\"\"\n$str\n\"\"\""
+    end
+    out = IOBuffer()
+    with_fake_pty() do pts, ptm
+        p = run(detach(cmd), pts, pts, pts, wait=false) # getpass uses stderr by default
+        Base.close_stdio(pts)
+
+        # Kill the process if it takes too long. Typically occurs when process is waiting
+        # for input.
+        timer = Channel{Symbol}(1)
+        watcher = @async begin
+            waited = 0
+            while waited < timeout && process_running(p)
+                sleep(1)
+                waited += 1
+            end
+
+            if process_running(p)
+                kill(p)
+                put!(timer, :timeout)
+            elseif success(p)
+                put!(timer, :success)
+            else
+                put!(timer, :failure)
+            end
+
+            # SIGKILL stubborn processes
+            if process_running(p)
+                sleep(3)
+                process_running(p) && kill(p, Base.SIGKILL)
+            end
+            wait(p)
+        end
+
+        wroteall = false
+        try
+            for (challenge, response) in challenges
+                write(out, readuntil(ptm, challenge, keep=true))
+                if !isopen(ptm)
+                    error("Could not locate challenge: \"$challenge\". ",
+                          format_output(out))
+                end
+                write(ptm, response)
+            end
+            wroteall = true
+
+            # Capture output from process until `pts` is closed
+            write(out, ptm)
+        catch ex
+            if !(wroteall && ex isa Base.IOError && ex.code == Base.UV_EIO)
+                # ignore EIO from `ptm` after `pts` dies
+                error("Process failed possibly waiting for a response. ",
+                      format_output(out))
+            end
+        end
+
+        status = fetch(timer)
+        close(ptm)
+        if status !== :success
+            if status === :timeout
+                error("Process timed out possibly waiting for a response. ",
+                      format_output(out))
+            else
+                error("Failed process. ", format_output(out), "\n", p)
+            end
+        end
+        wait(watcher)
+    end
+    nothing
+end
+
+end
diff --git a/test/testhelpers/FakePTYs.jl b/test/testhelpers/FakePTYs.jl
index c592699440ee0..56ce6dc7d3a49 100644
--- a/test/testhelpers/FakePTYs.jl
+++ b/test/testhelpers/FakePTYs.jl
@@ -1,5 +1,4 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
-
 module FakePTYs
 
 if Sys.iswindows()
@@ -24,10 +23,7 @@ function open_fake_pty()
         close(pts)
         pts = fds
         # convert pts handle to a TTY
-        #fds = pts.handle
-        #pts.status = Base.StatusClosed
-        #pts.handle = C_NULL
-        #pts = Base.TTY(fds, Base.StatusOpen)
+        #pts = open(fds)::Base.TTY
     else
         O_RDWR = Base.Filesystem.JL_O_RDWR
         O_NOCTTY = Base.Filesystem.JL_O_NOCTTY
@@ -44,8 +40,9 @@ function open_fake_pty()
         pts = RawFD(fds)
 
         # pts = fdio(fds, true)
-        # pts = Base.Filesystem.File(RawFD(fds))
-        # pts = Base.TTY(RawFD(fds); readable = false)
+        # pts = Base.Filesystem.File(pts)
+        # pts = Base.TTY(pts)
+        # pts = Base.open(pts)
         ptm = Base.TTY(RawFD(fdm))
     end
     return pts, ptm
diff --git a/test/testhelpers/OffsetDenseArrays.jl b/test/testhelpers/OffsetDenseArrays.jl
new file mode 100644
index 0000000000000..44a1b8d627800
--- /dev/null
+++ b/test/testhelpers/OffsetDenseArrays.jl
@@ -0,0 +1,31 @@
+"""
+    module OffsetDenseArrays
+
+A minimal implementation of an offset array which is also <: DenseArray.
+"""
+module OffsetDenseArrays
+
+struct OffsetDenseArray{A <: DenseVector, T} <: DenseVector{T}
+    x::A
+    offset::Int
+end
+OffsetDenseArray(x::AbstractVector{T}, i::Integer) where {T} = OffsetDenseArray{typeof(x), T}(x, Int(i))
+
+Base.size(x::OffsetDenseArray) = size(x.x)
+Base.pointer(x::OffsetDenseArray) = pointer(x.x)
+
+function Base.getindex(x::OffsetDenseArray, i::Integer)
+    @boundscheck checkbounds(x.x, i - x.offset)
+    x.x[i - x.offset]
+end
+
+function Base.setindex(x::OffsetDenseArray, v, i::Integer)
+    @boundscheck checkbounds(x.x, i - x.offset)
+    x.x[i - x.offset] = v
+end
+
+IndexStyle(::Type{<:OffsetDenseArray}) = Base.IndexLinear()
+Base.axes(x::OffsetDenseArray) = (x.offset + 1 : x.offset + length(x.x),)
+Base.keys(x::OffsetDenseArray) = only(axes(x))
+
+end # module
diff --git a/test/testhelpers/SizedArrays.jl b/test/testhelpers/SizedArrays.jl
index bc02fb5cbbd20..e52e965a64859 100644
--- a/test/testhelpers/SizedArrays.jl
+++ b/test/testhelpers/SizedArrays.jl
@@ -36,10 +36,16 @@ struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
         SZ == size(data) || throw(ArgumentError("size mismatch!"))
         new{SZ,T,N,A}(A(data))
     end
+    function SizedArray{SZ,T,N}(data::A) where {SZ,T,N,A<:AbstractArray{T,N}}
+        SizedArray{SZ,T,N,A}(data)
+    end
+    function SizedArray{SZ,T}(data::A) where {SZ,T,N,A<:AbstractArray{T,N}}
+        SizedArray{SZ,T,N,A}(data)
+    end
 end
 SizedMatrix{SZ,T,A<:AbstractArray} = SizedArray{SZ,T,2,A}
 SizedVector{SZ,T,A<:AbstractArray} = SizedArray{SZ,T,1,A}
-Base.convert(::Type{SizedArray{SZ,T,N,A}}, data::AbstractArray) where {SZ,T,N,A} = SizedArray{SZ,T,N,A}(data)
+Base.convert(::Type{S}, data::AbstractArray) where {S<:SizedArray} = data isa S ? data : S(data)
 
 # Minimal AbstractArray interface
 Base.size(a::SizedArray) = size(typeof(a))
@@ -64,6 +70,10 @@ function Base.similar(::Type{A}, shape::Tuple{SOneTo, Vararg{SOneTo}}) where {A<
     R = similar(A, length.(shape))
     SizedArray{length.(shape)}(R)
 end
+function Base.similar(x::SizedArray, ::Type{T}, shape::Tuple{SOneTo, Vararg{SOneTo}}) where {T}
+    sz = map(length, shape)
+    SizedArray{sz}(similar(parent(x), T, sz))
+end
 
 const SizedMatrixLike = Union{SizedMatrix, Transpose{<:Any, <:SizedMatrix}, Adjoint{<:Any, <:SizedMatrix}}
 
@@ -89,4 +99,7 @@ mul!(dest::AbstractMatrix, S1::SizedMatrix, S2::SizedMatrix, α::Number, β::Num
 mul!(dest::AbstractVector, M::AbstractMatrix, v::SizedVector, α::Number, β::Number) =
     mul!(dest, M, _data(v), α, β)
 
+LinearAlgebra.zeroslike(::Type{S}, ax::Tuple{SizedArrays.SOneTo, Vararg{SizedArrays.SOneTo}}) where {S<:SizedArray} =
+            zeros(eltype(S), ax)
+
 end
diff --git a/test/testhelpers/just_module.jl b/test/testhelpers/just_module.jl
new file mode 100644
index 0000000000000..71bd87e660eae
--- /dev/null
+++ b/test/testhelpers/just_module.jl
@@ -0,0 +1 @@
+@__MODULE__
diff --git a/test/threads.jl b/test/threads.jl
index 53dacdfe19288..6265368c2ac79 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -288,18 +288,16 @@ close(proc.in)
         proc = run(cmd; wait = false)
         done = Threads.Atomic{Bool}(false)
         timeout = false
-        timer = Timer(150) do _
+        timer = Timer(200) do _
             timeout = true
-            for sig in [Base.SIGTERM, Base.SIGHUP, Base.SIGKILL]
-                for _ in 1:1000
+            for sig in (Base.SIGQUIT, Base.SIGKILL)
+                for _ in 1:3
                     kill(proc, sig)
+                    sleep(1)
                     if done[]
-                        if sig != Base.SIGTERM
-                            @warn "Terminating `$script` required signal $sig"
-                        end
+                        @warn "Terminating `$script` required signal $sig"
                         return
                     end
-                    sleep(0.001)
                 end
             end
         end
@@ -309,16 +307,11 @@ close(proc.in)
             done[] = true
             close(timer)
         end
-        if ( !success(proc) ) || ( timeout )
+        if !success(proc) || timeout
             @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout
         end
-        if Sys.iswindows() || Sys.isapple()
-            # Known failure: https://github.com/JuliaLang/julia/issues/43124
-            @test_skip success(proc)
-        else
-            @test success(proc)
-            @test !timeout
-        end
+        @test success(proc)
+        @test !timeout
     end
 end
 
@@ -357,12 +350,27 @@ end
 
 @testset "jl_*affinity" begin
     cpumasksize = @ccall uv_cpumask_size()::Cint
-    if !Sys.iswindows() && cpumasksize > 0 # otherwise affinities are not supported on the platform (UV_ENOTSUP)
-        mask = zeros(Cchar, cpumasksize);
+    if cpumasksize > 0 # otherwise affinities are not supported on the platform (UV_ENOTSUP)
         jl_getaffinity = (tid, mask, cpumasksize) -> ccall(:jl_getaffinity, Int32, (Int16, Ptr{Cchar}, Int32), tid, mask, cpumasksize)
         jl_setaffinity = (tid, mask, cpumasksize) -> ccall(:jl_setaffinity, Int32, (Int16, Ptr{Cchar}, Int32), tid, mask, cpumasksize)
-        @test jl_getaffinity(1, mask, cpumasksize) == 0
-        fill!(mask, 1)
-        @test jl_setaffinity(1, mask, cpumasksize) == 0
+        mask = zeros(Cchar, cpumasksize)
+        @test jl_getaffinity(0, mask, cpumasksize) == 0
+        @test !all(iszero, mask)
+        @test jl_setaffinity(0, mask, cpumasksize) == 0
+    end
+end
+
+# Make sure default number of BLAS threads respects CPU affinity: issue #55572.
+@testset "LinearAlgebra number of default threads" begin
+    if AFFINITY_SUPPORTED
+        allowed_cpus = findall(uv_thread_getaffinity())
+        cmd = addenv(`$(Base.julia_cmd()) --startup-file=no -E 'using LinearAlgebra; BLAS.get_num_threads()'`,
+                     # Remove all variables which could affect the default number of threads
+                     "OPENBLAS_NUM_THREADS"=>nothing,
+                     "GOTO_NUM_THREADS"=>nothing,
+                     "OMP_NUM_THREADS"=>nothing)
+        for n in 1:min(length(allowed_cpus), 8) # Cap to 8 to avoid too many tests on large systems
+            @test readchomp(setcpuaffinity(cmd, allowed_cpus[1:n])) == string(max(1, n ÷ 2))
+        end
     end
 end
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index 595f8991d58d7..ac54dd009390c 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -1235,6 +1235,7 @@ end
             @testset "throw=true" begin
                 tasks, event = create_tasks()
                 push!(tasks, Threads.@spawn error("Error"))
+                wait(tasks[end]; throw=false)
 
                 @test_throws CompositeException begin
                     waitany(convert_tasks(tasks_type, tasks); throw=true)
diff --git a/test/trimming/Makefile b/test/trimming/Makefile
new file mode 100644
index 0000000000000..c6e105d637013
--- /dev/null
+++ b/test/trimming/Makefile
@@ -0,0 +1,55 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# This Makefile template requires the following variables to be set
+# in the environment or on the command-line:
+#   JULIA: path to julia[.exe] executable
+#   BIN:   binary build directory
+
+ifndef JULIA
+  $(error "Please pass JULIA=[path of target julia binary], or set as environment variable!")
+endif
+ifndef BIN
+  $(error "Please pass BIN=[path of build directory], or set as environment variable!")
+endif
+
+#=============================================================================
+# location of test source
+SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+JULIAHOME := $(abspath $(SRCDIR)/../..)
+BUILDSCRIPT := $(BIN)/../share/julia/juliac-buildscript.jl
+include $(JULIAHOME)/Make.inc
+
+# get the executable suffix, if any
+EXE := $(suffix $(abspath $(JULIA)))
+
+# get compiler and linker flags. (see: `contrib/julia-config.jl`)
+JULIA_CONFIG := $(JULIA) -e 'include(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "julia-config.jl"))' --
+CPPFLAGS_ADD :=
+CFLAGS_ADD = $(shell $(JULIA_CONFIG) --cflags)
+LDFLAGS_ADD = -lm $(shell $(JULIA_CONFIG) --ldflags --ldlibs) -ljulia-internal
+
+#=============================================================================
+
+release: hello$(EXE)
+
+hello.o: $(SRCDIR)/hello.jl $(BUILDSCRIPT)
+	$(JULIA) -t 1 -J $(BIN)/../lib/julia/sys.so --startup-file=no --history-file=no --output-o $@ --output-incremental=no --strip-ir --strip-metadata --trim $(BUILDSCRIPT) $(SRCDIR)/hello.jl --output-exe true
+
+init.o: $(SRCDIR)/init.c
+	$(CC) -c -o $@ $< $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS)
+
+hello$(EXE): hello.o init.o
+	$(CC) -o $@ $(WHOLE_ARCHIVE) hello.o $(NO_WHOLE_ARCHIVE) init.o $(CPPFLAGS_ADD) $(CPPFLAGS) $(CFLAGS_ADD) $(CFLAGS) $(LDFLAGS_ADD) $(LDFLAGS)
+
+check: hello$(EXE)
+	$(JULIA) --depwarn=error $(SRCDIR)/../runtests.jl $(SRCDIR)/trimming
+
+clean:
+	-rm -f hello$(EXE) init.o hello.o
+
+.PHONY: release clean check
+
+# Makefile debugging trick:
+# call print-VARIABLE to see the runtime value of any variable
+print-%:
+	@echo '$*=$($*)'
diff --git a/test/trimming/hello.jl b/test/trimming/hello.jl
new file mode 100644
index 0000000000000..307bf820f325b
--- /dev/null
+++ b/test/trimming/hello.jl
@@ -0,0 +1,6 @@
+module MyApp
+Base.@ccallable function main()::Cint
+    println(Core.stdout, "Hello, world!")
+    return 0
+end
+end
diff --git a/test/trimming/init.c b/test/trimming/init.c
new file mode 100644
index 0000000000000..ea1b02f8e5c8f
--- /dev/null
+++ b/test/trimming/init.c
@@ -0,0 +1,9 @@
+#include <julia.h>
+
+__attribute__((constructor)) void static_init(void)
+{
+    if (jl_is_initialized())
+        return;
+    julia_init(JL_IMAGE_IN_MEMORY);
+    jl_exception_clear();
+}
diff --git a/test/trimming/trimming.jl b/test/trimming/trimming.jl
new file mode 100644
index 0000000000000..dfacae7f8e531
--- /dev/null
+++ b/test/trimming/trimming.jl
@@ -0,0 +1,7 @@
+using Test
+
+exe_path = joinpath(@__DIR__, "hello"*splitext(Base.julia_exename())[2])
+
+@test readchomp(`$exe_path`) == "Hello, world!"
+
+@test filesize(exe_path) < filesize(unsafe_string(Base.JLOptions().image_file))/10
diff --git a/test/tuple.jl b/test/tuple.jl
index b1894bd2bb6ce..355ad965f9584 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -533,7 +533,7 @@ end
         @test ntuple(identity, Val(n)) == ntuple(identity, n)
     end
 
-    @test Core.Compiler.return_type(ntuple, Tuple{typeof(identity), Val}) == Tuple{Vararg{Int}}
+    @test Base.infer_return_type(ntuple, Tuple{typeof(identity), Val}) == Tuple{Vararg{Int}}
 end
 
 struct A_15703{N}
@@ -835,8 +835,8 @@ end
     @test @inferred(Base.circshift(t3, 7)) == ('b', 'c', 'd', 'a')
     @test @inferred(Base.circshift(t3, -1)) == ('b', 'c', 'd', 'a')
     @test_throws MethodError circshift(t1, 'a')
-    @test Core.Compiler.return_type(circshift, Tuple{Tuple,Integer}) <: Tuple
-    @test Core.Compiler.return_type(circshift, Tuple{Tuple{Vararg{Any,10}},Integer}) <: Tuple{Vararg{Any,10}}
+    @test Base.infer_return_type(circshift, Tuple{Tuple,Integer}) <: Tuple
+    @test Base.infer_return_type(circshift, Tuple{Tuple{Vararg{Any,10}},Integer}) <: Tuple{Vararg{Any,10}}
     for len ∈ 0:5
         v = 1:len
         t = Tuple(v)